A search interface for data from the Politics of Patents case study (part of Copim WP6): this parses data from the archive of RTF files and provides additional data from the European Patent Office API. https://patents.copim.ac.uk
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

125 line
4.0KB

  1. <?php
  2. function solr_search($search){
  3. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  4. // This query retrieves only the bib identifier field for records which satisfy the search query
  5. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $_ENV["SOLR_CORE"] . '/select?q.op=OR&q=content%3A' . $search . '&wt=json';
  6. // Perform Curl request on the Solr API
  7. $ch = curl_init();
  8. curl_setopt($ch, CURLOPT_URL, $solrurl);
  9. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  10. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  11. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
  12. $response = curl_exec($ch);
  13. curl_close($ch);
  14. // Turn the API response into useful Json
  15. $json = json_decode($response);
  16. // If no results are found, display a message
  17. if ($json->response->numFound == '0'){
  18. $output = 'No results found';
  19. }
  20. else{
  21. foreach ($json->response->docs as $result){
  22. $content = $result->content;
  23. $result_output = parse_result($content);
  24. $results[] = $result_output;
  25. }
  26. }
  27. return $results;
  28. }
  29. function parse_result($input){
  30. //Set document reference number (used for OPS API)
  31. preg_match('/=D\s(([^\s]*)\s([^\s]*)\s([^\s]*))/', $input, $doc_ref);
  32. $output['doc_ref'] = str_replace(' ','',$doc_ref[1]);
  33. // Search for the application ID in the content element and display it
  34. preg_match('/Application.*\n(.*)\n/', $input, $application_id);
  35. $output['application_id'] = $application_id[1];
  36. // Search for the EPO publication URL in the content element and display it
  37. preg_match('/Publication.*\n(.*)\n/', $input, $epo_publication);
  38. $output['epo_publication_url'] = $epo_publication[1];
  39. // Search for the IPC publication URL in the content element and display it
  40. preg_match('/IPC.*\n(.*)\n/', $input, $ipc_publication);
  41. $output['ipc_publication_url'] = $ipc_publication[1];
  42. // Search for the title in the content element and display it
  43. preg_match('/Title.*\n(.*)\n/', $input, $title);
  44. $output['title'] = $title[1];
  45. // Search for the abstract in the content element and display it
  46. if (preg_match('/Abstract.*\n(.*)\n/', $input, $abstract)){
  47. $output['abstract'] = $abstract[1];
  48. }
  49. else {
  50. preg_match('/\(.\) \\n\\n(.*)\\n/', $input, $abstract);
  51. $output['abstract'] = $abstract[1];
  52. }
  53. return $output;
  54. }
  55. function get_random_record(){
  56. // Generate a random number for sorting by random
  57. $random = rand();
  58. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  59. // This query retrieves only the bib identifier field for records which satisfy the search query
  60. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $_ENV["SOLR_CORE"] . '/select?q.op=OR&q=*%3A*&wt=json&sort=random_' . $random . '%20asc';
  61. // Perform Curl request on the Solr API
  62. $ch = curl_init();
  63. curl_setopt($ch, CURLOPT_URL, $solrurl);
  64. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  65. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  66. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
  67. $response = curl_exec($ch);
  68. curl_close($ch);
  69. // Turn the API response into useful Json
  70. $json = json_decode($response);
  71. // Pick a random key out of the docs array
  72. $random = array_rand($json->response->docs);
  73. //Set content variable
  74. $content = $json->response->docs[$random]->content;
  75. return $content;
  76. }
  77. function one_random_record (){
  78. $content = get_random_record();
  79. $output = parse_result($content);
  80. return $output;
  81. }
  82. function ten_random_titles (){
  83. for ($x=0; $x <= 9; $x++) {
  84. $random = get_random_record();
  85. // Search for the title in the content element and display it
  86. preg_match('/Title.*\n(.*)\n/', $random, $title);
  87. $output[$x] = $title[1];
  88. }
  89. return $output;
  90. }
  91. ?>