A search interface for the Performing Patents Otherwise publication as part of the Politics of Patents case study (part of Copim WP6): this parses data from the archive of RTF files and provides additional data from the European Patent Office OPS API. https://patents.copim.ac.uk
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ops.py 6.4KB

2 years ago
2 years ago
2 years ago
2 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. # @name: ops.py
  2. # @creation_date: 2022-09-08
  3. # @license: The MIT License <https://opensource.org/licenses/MIT>
  4. # @author: Simon Bowie <simon.bowie.19@gmail.com>
  5. # @purpose: Performs functions against the European Patent Office's Open Patent Services (OPS) API
  6. # @acknowledgements:
  7. # OPS documented at https://www.epo.org/searching-for-patents/data/web-services/ops.html
  8. # OPS RESTful API specification at http://documents.epo.org/projects/babylon/eponet.nsf/0/F3ECDCC915C9BCD8C1258060003AA712/$File/ops_v3.2_documentation_-_version_1.3.18_en.pdf
  9. # OPS API functions list at https://developers.epo.org/ops-v3-2/apis
  10. import os
  11. import requests
  12. import base64
  13. from wand.image import Image
  14. # get config variables from OS environment variables: set in env file passed through Docker Compose
  15. ops_url = os.environ.get('OPS_URL')
  16. ops_url_images = os.environ.get('OPS_URL_IMAGES')
  17. consumer_key = os.environ.get('CONSUMER_KEY')
  18. consumer_secret = os.environ.get('CONSUMER_SECRET')
  19. def get_access_token():
  20. # OPS API credentials (details at http://documents.epo.org/projects/babylon/eponet.nsf/0/F3ECDCC915C9BCD8C1258060003AA712/$File/ops_v3.2_documentation_-_version_1.3.18_en.pdf)
  21. endpoint_url = ops_url + '3.2/auth/accesstoken'
  22. auth = consumer_key + ":" + consumer_secret
  23. auth_bytes = auth.encode("ascii")
  24. base64_bytes = base64.b64encode(auth_bytes)
  25. base64_string = base64_bytes.decode("ascii")
  26. # set up API call
  27. headers = {"Authorization": "Basic " + base64_string, "Content-Type": "application/x-www-form-urlencoded"}
  28. data = "grant_type=client_credentials"
  29. # give back result
  30. response = requests.post(endpoint_url, headers=headers, data=data)
  31. if response.status_code == 200:
  32. # turn the API response into useful Json
  33. json = response.json()
  34. access_token = json['access_token']
  35. return access_token
  36. def get_publication_details(doc_ref):
  37. access_token = get_access_token()
  38. # OPS API credentials (details at http://documents.epo.org/projects/babylon/eponet.nsf/0/F3ECDCC915C9BCD8C1258060003AA712/$File/ops_v3.2_documentation_-_version_1.3.16_en.pdf)
  39. endpoint_url = ops_url + 'rest-services/published-data/publication/docdb/' + doc_ref + '/biblio'
  40. # set up API call
  41. headers = {"Authorization": "Bearer " + access_token, "Accept": "application/json"}
  42. # get result
  43. response = requests.get(endpoint_url, headers=headers)
  44. output = {}
  45. if response.status_code == 200:
  46. # turn the API response into useful Json
  47. json = response.json()
  48. # for each invention title, check if it's in the original language
  49. try:
  50. json['ops:world-patent-data']['exchange-documents']['exchange-document']['bibliographic-data']['invention-title']
  51. invention_titles = json['ops:world-patent-data']['exchange-documents']['exchange-document']['bibliographic-data']['invention-title']
  52. try:
  53. invention_titles[1]
  54. for invention_title in invention_titles:
  55. if invention_title['@lang'] is not None and invention_title['@lang'] != 'en':
  56. output['original_title'] = invention_title['$']
  57. except KeyError:
  58. if invention_titles['@lang'] is not None and invention_titles['@lang'] != 'en':
  59. output['original_title'] = invention_titles['$']
  60. except KeyError:
  61. pass
  62. # for each abstract, check if it's in the original language
  63. try:
  64. json['ops:world-patent-data']['exchange-documents']['exchange-document']['abstract']
  65. abstracts = json['ops:world-patent-data']['exchange-documents']['exchange-document']['abstract']
  66. try:
  67. abstracts[1]
  68. for abstract in abstracts:
  69. if abstract['@lang'] is not None and abstract['@lang'] != 'en':
  70. output['original_abstract'] = abstract['p']['$']
  71. except KeyError:
  72. if abstracts['@lang'] is not None and abstracts['@lang'] != 'en':
  73. output['original_abstract'] = abstracts['p']['$']
  74. except KeyError:
  75. pass
  76. return output
  77. def get_images(doc_ref):
  78. access_token = get_access_token()
  79. # OPS API credentials (details at http://documents.epo.org/projects/babylon/eponet.nsf/0/F3ECDCC915C9BCD8C1258060003AA712/$File/ops_v3.2_documentation_-_version_1.3.16_en.pdf)
  80. endpoint_url = ops_url + 'rest-services/published-data/publication/docdb/' + doc_ref + '/images'
  81. # set up API call
  82. headers = {"Authorization": "Bearer " + access_token, "Accept": "application/json"}
  83. # give back result
  84. response = requests.get(endpoint_url, headers=headers)
  85. if response.status_code == 200:
  86. output = {}
  87. drawings_url = {}
  88. # turn the API response into useful Json
  89. json = response.json()
  90. try:
  91. json['ops:world-patent-data']['ops:document-inquiry']['ops:inquiry-result']['ops:document-instance']
  92. document_instances = json['ops:world-patent-data']['ops:document-inquiry']['ops:inquiry-result']['ops:document-instance']
  93. try:
  94. document_instances[0]
  95. for document_instance in document_instances:
  96. if document_instance['@desc'] == 'Drawing':
  97. drawings_url = ops_url_images + '3.2/rest-services/' + document_instance['@link'] + '?Range=1'
  98. if drawings_url is None:
  99. for document_instance in document_instances:
  100. if document_instance['@desc'] == 'FullDocument':
  101. drawings_url = ops_url_images + '3.2/rest-services/' + document_instance['@link'] + '?Range=1'
  102. except KeyError:
  103. pass
  104. try:
  105. drawings_url[0]
  106. # set up API call
  107. headers = {"Authorization": "Bearer " + access_token, "Accept": "application/tiff"}
  108. # give back result
  109. response = requests.get(drawings_url, headers=headers)
  110. if response.status_code == 200:
  111. with Image(blob = response.content) as image:
  112. png_blob = image.make_blob('png')
  113. base64_bytes = base64.b64encode(png_blob)
  114. output['image'] = base64_bytes.decode("ascii")
  115. except KeyError:
  116. pass
  117. except KeyError:
  118. pass
  119. else:
  120. output = False
  121. return output