A search interface for data from the Politics of Patents case study (part of Copim WP6): this parses data from the archive of RTF files and provides additional data from the European Patent Office API. https://patents.copim.ac.uk
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

132 lines
4.7KB

  1. <?php
  2. function solr_search($search){
  3. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  4. // This query retrieves only the bib identifier field for records which satisfy the search query
  5. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $_ENV["SOLR_CORE"] . '/select?q.op=OR&q=content%3A' . $search . '&wt=json';
  6. // Perform Curl request on the Solr API
  7. $ch = curl_init();
  8. curl_setopt($ch, CURLOPT_URL, $solrurl);
  9. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  10. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  11. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
  12. $response = curl_exec($ch);
  13. curl_close($ch);
  14. // Turn the API response into useful Json
  15. $json = json_decode($response);
  16. // If no results are found, display a message
  17. if ($json->response->numFound == '0'){
  18. $output = 'No results found';
  19. }
  20. else{
  21. $output = parse_results($json);
  22. }
  23. return $output;
  24. }
  25. function parse_results($json_response){
  26. foreach ($json_response->response->docs as $result){
  27. $content = $result->content;
  28. //Set document reference number (used for OPS API)
  29. preg_match('/=D\s(([^\s]*)\s([^\s]*)\s([^\s]*))/', $content, $doc_ref);
  30. $result_output['doc_ref'] = str_replace(' ','',$doc_ref[1]);
  31. // Search for the application ID in the content element and display it
  32. preg_match('/Application.*\n(.*)\n/', $content, $application_id);
  33. $result_output['application_id'] = $application_id[1];
  34. // Search for the EPO publication URL in the content element and display it
  35. preg_match('/Publication.*\n(.*)\n/', $content, $epo_publication);
  36. $result_output['epo_publication_url'] = $epo_publication[1];
  37. // Search for the IPC publication URL in the content element and display it
  38. preg_match('/IPC.*\n(.*)\n/', $content, $ipc_publication);
  39. $result_output['ipc_publication_url'] = $ipc_publication[1];
  40. // Search for the title in the content element and display it
  41. preg_match('/Title.*\n(.*)\n/', $content, $title);
  42. $result_output['title'] = $title[1];
  43. // Search for the abstract in the content element and display it
  44. preg_match('/Abstract.*\n(.*)\n/', $content, $abstract);
  45. $result_output['abstract'] = $abstract[1];
  46. $results[] = $result_output;
  47. }
  48. return $results;
  49. }
  50. function random_record (){
  51. // Generate a random number for sorting by random
  52. $random = rand();
  53. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  54. // This query retrieves only the bib identifier field for records which satisfy the search query
  55. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $_ENV["SOLR_CORE"] . '/select?q.op=OR&q=*%3A*&wt=json&sort=random_' . $random . '%20asc';
  56. // Perform Curl request on the Solr API
  57. $ch = curl_init();
  58. curl_setopt($ch, CURLOPT_URL, $solrurl);
  59. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  60. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  61. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
  62. $response = curl_exec($ch);
  63. curl_close($ch);
  64. // Turn the API response into useful Json
  65. $json = json_decode($response);
  66. // Pick a random key out of the docs array
  67. $random = array_rand($json->response->docs);
  68. //Set content variable
  69. $content = $json->response->docs[$random]->content;
  70. //Set document reference number (used for OPS API)
  71. preg_match('/=D\s(([^\s]*)\s([^\s]*)\s([^\s]*))/', $content, $doc_ref);
  72. $output['doc_ref'] = str_replace(' ','',$doc_ref[1]);
  73. // Search for the application ID in the content element and display it
  74. preg_match('/Application.*\n(.*)\n/', $content, $application_id);
  75. $output['application_id'] = $application_id[1];
  76. // Search for the EPO publication URL in the content element and display it
  77. preg_match('/Publication.*\n(.*)\n/', $content, $epo_publication);
  78. $output['epo_publication_url'] = $epo_publication[1];
  79. // Search for the IPC publication URL in the content element and display it
  80. preg_match('/IPC.*\n(.*)\n/', $content, $ipc_publication);
  81. $output['ipc_publication_url'] = $ipc_publication[1];
  82. // Search for the title in the content element and display it
  83. preg_match('/Title.*\n(.*)\n/', $content, $title);
  84. $output['title'] = $title[1];
  85. // Search for the abstract in the content element and display it
  86. if (preg_match('/Abstract.*\n(.*)\n/', $content, $abstract)){
  87. $output['abstract'] = $abstract[1];
  88. }
  89. else {
  90. preg_match('/\(.\) \\n\\n(.*)\\n/', $content, $abstract);
  91. $output['abstract'] = $abstract[1];
  92. }
  93. return $output;
  94. }
  95. ?>