A search interface for data from the Politics of Patents case study (part of Copim WP6): this parses data from the archive of RTF files and provides additional data from the European Patent Office API. https://patents.copim.ac.uk
Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

205 lines
6.3KB

  1. <?php
  2. function solr_search($search, $core){
  3. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  4. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=content%3A' . $search . '&wt=json';
  5. // Perform Curl request on the Solr API
  6. $ch = curl_init();
  7. curl_setopt($ch, CURLOPT_URL, $solrurl);
  8. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  9. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  10. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
  11. $response = curl_exec($ch);
  12. curl_close($ch);
  13. // Turn the API response into useful Json
  14. $json = json_decode($response);
  15. // If no results are found, display a message
  16. if ($json->response->numFound == '0'){
  17. $output = 'no results found';
  18. }
  19. else{
  20. foreach ($json->response->docs as $result){
  21. $id = $result->id;
  22. $content = $result->content;
  23. $result_output = parse_result($id, $content);
  24. $output[] = $result_output;
  25. }
  26. }
  27. return $output;
  28. }
  29. function solr_search_id($id, $core){
  30. // URL encode the ID string
  31. $id = urlencode($id);
  32. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  33. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=id%3A"' . $id . '"&wt=json';
  34. // Perform Curl request on the Solr API
  35. $ch = curl_init();
  36. curl_setopt($ch, CURLOPT_URL, $solrurl);
  37. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  38. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  39. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
  40. $response = curl_exec($ch);
  41. curl_close($ch);
  42. // Turn the API response into useful Json
  43. $json = json_decode($response);
  44. // If no results are found, display a message
  45. if ($json->response->numFound == '0'){
  46. $output = 'no results found';
  47. }
  48. else{
  49. foreach ($json->response->docs as $result){
  50. $id = $result->id;
  51. $content = $result->content;
  52. $result_output = parse_result($id, $content);
  53. $output[] = $result_output;
  54. }
  55. }
  56. return $output;
  57. }
  58. function parse_result($id, $input){
  59. $output['id'] = $id;
  60. //Set document reference number (used for OPS API)
  61. preg_match('/=D\s(([^\s]*)\s([^\s]*)\s([^\s]*))/', $input, $doc_ref);
  62. $output['doc_ref'] = str_replace(' ','',$doc_ref[1]);
  63. // Search for the application ID in the content element and display it
  64. preg_match('/Application.*\n(.*)\n/', $input, $application_id);
  65. $output['application_id'] = $application_id[1];
  66. // Search for the EPO publication URL in the content element and display it
  67. preg_match('/Publication.*\n(.*)\n/', $input, $epo_publication);
  68. $output['epo_publication_url'] = $epo_publication[1];
  69. // Search for the IPC publication URL in the content element and display it
  70. preg_match('/IPC.*\n(.*)\n/', $input, $ipc_publication);
  71. $output['ipc_publication_url'] = $ipc_publication[1];
  72. // Search for the title in the content element and display it
  73. preg_match('/Title.*\n(.*)\n/', $input, $title);
  74. $output['title'] = $title[1];
  75. // Search for the abstract in the content element and display it
  76. if (preg_match('/Abstract.*\n(.*)\n/', $input, $abstract)){
  77. $output['abstract'] = $abstract[1];
  78. }
  79. else {
  80. preg_match('/\(.\) \\n\\n(.*)\\n/', $input, $abstract);
  81. $output['abstract'] = $abstract[1];
  82. }
  83. return $output;
  84. }
  85. function get_random_record(){
  86. // Generate a random number for sorting by random
  87. $random = rand();
  88. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  89. // This query retrieves only the bib identifier field for records which satisfy the search query
  90. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $_ENV["SOLR_CORE"] . '/select?q.op=OR&q=*%3A*&wt=json&sort=random_' . $random . '%20asc';
  91. // Perform Curl request on the Solr API
  92. $ch = curl_init();
  93. curl_setopt($ch, CURLOPT_URL, $solrurl);
  94. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  95. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  96. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
  97. $response = curl_exec($ch);
  98. curl_close($ch);
  99. // Turn the API response into useful Json
  100. $json = json_decode($response);
  101. // Pick a random key out of the docs array
  102. $random = array_rand($json->response->docs);
  103. //Set ID variable
  104. $id = $json->response->docs[$random]->id;
  105. //Set content variable
  106. $content = $json->response->docs[$random]->content;
  107. //Construct associative array with ID and content
  108. $result_array = array($id=>$content);
  109. return $result_array;
  110. }
  111. function one_random_record (){
  112. $random = get_random_record();
  113. foreach ($random as $id => $content){
  114. $output = parse_result($content);
  115. }
  116. return $output;
  117. }
  118. function ten_random_titles (){
  119. for ($x=0; $x <= 9; $x++) {
  120. $random = get_random_record();
  121. foreach($random as $id => $content){
  122. // Search for the title in the content element and display it
  123. preg_match('/Title.*\n(.*)\n/', $content, $title);
  124. $output[$x] = array($id=>$title[1]);
  125. }
  126. }
  127. return $output;
  128. }
  129. function ten_random_abstracts (){
  130. for ($x=0; $x <= 9; $x++) {
  131. $random = get_random_record();
  132. foreach($random as $id => $content){
  133. // Search for the abstract in the content element and display it
  134. if (preg_match('/Abstract.*\n(.*)\n/', $content, $abstract)){
  135. $output[$x] = array($id=>$abstract[1]);
  136. }
  137. else {
  138. preg_match('/\(.\) \\n\\n(.*)\\n/', $content, $abstract);
  139. $output[$x] = array($id=>$abstract[1]);
  140. }
  141. }
  142. }
  143. return $output;
  144. }
  145. function ten_random_doc_refs (){
  146. $x = 0;
  147. while ($x < 9) {
  148. $random = get_random_record();
  149. foreach($random as $id => $content){
  150. //Set document reference number (used for OPS API)
  151. preg_match('/=D\s(([^\s]*)\s([^\s]*)\s([^\s]*))/', $content, $doc_ref);
  152. $doc_ref = str_replace(' ','',$doc_ref[1]);
  153. if (check_for_images($doc_ref)){
  154. $output[$x] = $doc_ref;
  155. ++$x;
  156. }
  157. }
  158. }
  159. return $output;
  160. }
  161. ?>