A search interface for the Performing Patents Otherwise publication as part of the Politics of Patents case study (part of Copim WP6): this parses data from the archive of RTF files and provides additional data from the European Patent Office OPS API. https://patents.copim.ac.uk
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

158 lines
6.4KB

  1. # @name: ops.py
  2. # @version: 0.1
  3. # @creation_date: 2022-09-08
  4. # @license: The MIT License <https://opensource.org/licenses/MIT>
  5. # @author: Simon Bowie <simon.bowie.19@gmail.com>
  6. # @purpose: Performs functions against the European Patent Office's Open Patent Services (OPS) API
  7. # @acknowledgements:
  8. # OPS documented at https://www.epo.org/searching-for-patents/data/web-services/ops.html
  9. # OPS RESTful API specification at http://documents.epo.org/projects/babylon/eponet.nsf/0/F3ECDCC915C9BCD8C1258060003AA712/$File/ops_v3.2_documentation_-_version_1.3.18_en.pdf
  10. # OPS API functions list at https://developers.epo.org/ops-v3-2/apis
  11. import os
  12. import requests
  13. import base64
  14. from wand.image import Image
  15. # get config variables from OS environment variables: set in env file passed through Docker Compose
  16. ops_url = os.environ.get('OPS_URL')
  17. ops_url_images = os.environ.get('OPS_URL_IMAGES')
  18. consumer_key = os.environ.get('CONSUMER_KEY')
  19. consumer_secret = os.environ.get('CONSUMER_SECRET')
  20. def get_access_token():
  21. # OPS API credentials (details at http://documents.epo.org/projects/babylon/eponet.nsf/0/F3ECDCC915C9BCD8C1258060003AA712/$File/ops_v3.2_documentation_-_version_1.3.18_en.pdf)
  22. endpoint_url = ops_url + '3.2/auth/accesstoken'
  23. auth = consumer_key + ":" + consumer_secret
  24. auth_bytes = auth.encode("ascii")
  25. base64_bytes = base64.b64encode(auth_bytes)
  26. base64_string = base64_bytes.decode("ascii")
  27. # set up API call
  28. headers = {"Authorization": "Basic " + base64_string, "Content-Type": "application/x-www-form-urlencoded"}
  29. data = "grant_type=client_credentials"
  30. # give back result
  31. response = requests.post(endpoint_url, headers=headers, data=data)
  32. if response.status_code == 200:
  33. # turn the API response into useful Json
  34. json = response.json()
  35. access_token = json['access_token']
  36. return access_token
  37. def get_publication_details(doc_ref):
  38. access_token = get_access_token()
  39. # OPS API credentials (details at http://documents.epo.org/projects/babylon/eponet.nsf/0/F3ECDCC915C9BCD8C1258060003AA712/$File/ops_v3.2_documentation_-_version_1.3.16_en.pdf)
  40. endpoint_url = ops_url + 'rest-services/published-data/publication/docdb/' + doc_ref + '/biblio'
  41. # set up API call
  42. headers = {"Authorization": "Bearer " + access_token, "Accept": "application/json"}
  43. # get result
  44. response = requests.get(endpoint_url, headers=headers)
  45. output = {}
  46. if response.status_code == 200:
  47. # turn the API response into useful Json
  48. json = response.json()
  49. # for each invention title, check if it's in the original language
  50. try:
  51. json['ops:world-patent-data']['exchange-documents']['exchange-document']['bibliographic-data']['invention-title']
  52. invention_titles = json['ops:world-patent-data']['exchange-documents']['exchange-document']['bibliographic-data']['invention-title']
  53. try:
  54. invention_titles[1]
  55. for invention_title in invention_titles:
  56. if invention_title['@lang'] is not None and invention_title['@lang'] != 'en':
  57. output['original_title'] = invention_title['$']
  58. except KeyError:
  59. if invention_titles['@lang'] is not None and invention_titles['@lang'] != 'en':
  60. output['original_title'] = invention_titles['$']
  61. except KeyError:
  62. pass
  63. # for each abstract, check if it's in the original language
  64. try:
  65. json['ops:world-patent-data']['exchange-documents']['exchange-document']['abstract']
  66. abstracts = json['ops:world-patent-data']['exchange-documents']['exchange-document']['abstract']
  67. try:
  68. abstracts[1]
  69. for abstract in abstracts:
  70. if abstract['@lang'] is not None and abstract['@lang'] != 'en':
  71. output['original_abstract'] = abstract['p']['$']
  72. except KeyError:
  73. if abstracts['@lang'] is not None and abstracts['@lang'] != 'en':
  74. output['original_abstract'] = abstracts['p']['$']
  75. except KeyError:
  76. pass
  77. return output
  78. def get_images(doc_ref):
  79. access_token = get_access_token()
  80. # OPS API credentials (details at http://documents.epo.org/projects/babylon/eponet.nsf/0/F3ECDCC915C9BCD8C1258060003AA712/$File/ops_v3.2_documentation_-_version_1.3.16_en.pdf)
  81. endpoint_url = ops_url + 'rest-services/published-data/publication/docdb/' + doc_ref + '/images'
  82. # set up API call
  83. headers = {"Authorization": "Bearer " + access_token, "Accept": "application/json"}
  84. # give back result
  85. response = requests.get(endpoint_url, headers=headers)
  86. if response.status_code == 200:
  87. output = {}
  88. drawings_url = {}
  89. # turn the API response into useful Json
  90. json = response.json()
  91. try:
  92. json['ops:world-patent-data']['ops:document-inquiry']['ops:inquiry-result']['ops:document-instance']
  93. document_instances = json['ops:world-patent-data']['ops:document-inquiry']['ops:inquiry-result']['ops:document-instance']
  94. try:
  95. document_instances[0]
  96. for document_instance in document_instances:
  97. if document_instance['@desc'] == 'Drawing':
  98. drawings_url = ops_url_images + '3.2/rest-services/' + document_instance['@link'] + '?Range=1'
  99. if drawings_url is None:
  100. for document_instance in document_instances:
  101. if document_instance['@desc'] == 'FullDocument':
  102. drawings_url = ops_url_images + '3.2/rest-services/' + document_instance['@link'] + '?Range=1'
  103. except KeyError:
  104. pass
  105. try:
  106. drawings_url[0]
  107. # set up API call
  108. headers = {"Authorization": "Bearer " + access_token, "Accept": "application/tiff"}
  109. # give back result
  110. response = requests.get(drawings_url, headers=headers)
  111. if response.status_code == 200:
  112. with Image(blob = response.content) as image:
  113. png_blob = image.make_blob('png')
  114. base64_bytes = base64.b64encode(png_blob)
  115. output['image'] = base64_bytes.decode("ascii")
  116. except KeyError:
  117. pass
  118. except KeyError:
  119. pass
  120. else:
  121. output = False
  122. return output