A search interface for data from the Politics of Patents case study (part of Copim WP6): this parses data from the archive of RTF files and provides additional data from the European Patent Office API. https://patents.copim.ac.uk
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.

201 líneas
6.3KB

  1. <?php
  2. function solr_search($search, $core){
  3. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  4. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=content%3A' . $search . '&wt=json';
  5. // Perform Curl request on the Solr API
  6. $ch = curl_init();
  7. curl_setopt($ch, CURLOPT_URL, $solrurl);
  8. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  9. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  10. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
  11. $response = curl_exec($ch);
  12. curl_close($ch);
  13. // Turn the API response into useful Json
  14. $json = json_decode($response);
  15. // If no results are found, display a message
  16. if ($json->response->numFound == '0'){
  17. $output = 'no results found';
  18. }
  19. else{
  20. foreach ($json->response->docs as $result){
  21. $content = $result->content;
  22. $result_output = parse_result($content);
  23. $output[] = $result_output;
  24. }
  25. }
  26. return $output;
  27. }
  28. function solr_search_id($id){
  29. // URL encode the ID string
  30. $id = urlencode($id);
  31. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  32. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $_ENV["SOLR_CORE"] . '/select?q.op=OR&q=id%3A"' . $id . '"&wt=json';
  33. // Perform Curl request on the Solr API
  34. $ch = curl_init();
  35. curl_setopt($ch, CURLOPT_URL, $solrurl);
  36. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  37. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  38. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
  39. $response = curl_exec($ch);
  40. curl_close($ch);
  41. // Turn the API response into useful Json
  42. $json = json_decode($response);
  43. // If no results are found, display a message
  44. if ($json->response->numFound == '0'){
  45. $output = 'No results found';
  46. }
  47. else{
  48. foreach ($json->response->docs as $result){
  49. $content = $result->content;
  50. $result_output = parse_result($content);
  51. $results[] = $result_output;
  52. }
  53. }
  54. return $results;
  55. }
  56. function parse_result($input){
  57. //Set document reference number (used for OPS API)
  58. preg_match('/=D\s(([^\s]*)\s([^\s]*)\s([^\s]*))/', $input, $doc_ref);
  59. $output['doc_ref'] = str_replace(' ','',$doc_ref[1]);
  60. // Search for the application ID in the content element and display it
  61. preg_match('/Application.*\n(.*)\n/', $input, $application_id);
  62. $output['application_id'] = $application_id[1];
  63. // Search for the EPO publication URL in the content element and display it
  64. preg_match('/Publication.*\n(.*)\n/', $input, $epo_publication);
  65. $output['epo_publication_url'] = $epo_publication[1];
  66. // Search for the IPC publication URL in the content element and display it
  67. preg_match('/IPC.*\n(.*)\n/', $input, $ipc_publication);
  68. $output['ipc_publication_url'] = $ipc_publication[1];
  69. // Search for the title in the content element and display it
  70. preg_match('/Title.*\n(.*)\n/', $input, $title);
  71. $output['title'] = $title[1];
  72. // Search for the abstract in the content element and display it
  73. if (preg_match('/Abstract.*\n(.*)\n/', $input, $abstract)){
  74. $output['abstract'] = $abstract[1];
  75. }
  76. else {
  77. preg_match('/\(.\) \\n\\n(.*)\\n/', $input, $abstract);
  78. $output['abstract'] = $abstract[1];
  79. }
  80. return $output;
  81. }
  82. function get_random_record(){
  83. // Generate a random number for sorting by random
  84. $random = rand();
  85. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  86. // This query retrieves only the bib identifier field for records which satisfy the search query
  87. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $_ENV["SOLR_CORE"] . '/select?q.op=OR&q=*%3A*&wt=json&sort=random_' . $random . '%20asc';
  88. // Perform Curl request on the Solr API
  89. $ch = curl_init();
  90. curl_setopt($ch, CURLOPT_URL, $solrurl);
  91. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  92. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  93. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
  94. $response = curl_exec($ch);
  95. curl_close($ch);
  96. // Turn the API response into useful Json
  97. $json = json_decode($response);
  98. // Pick a random key out of the docs array
  99. $random = array_rand($json->response->docs);
  100. //Set ID variable
  101. $id = $json->response->docs[$random]->id;
  102. //Set content variable
  103. $content = $json->response->docs[$random]->content;
  104. //Construct associative array with ID and content
  105. $result_array = array($id=>$content);
  106. return $result_array;
  107. }
  108. function one_random_record (){
  109. $random = get_random_record();
  110. foreach ($random as $id => $content){
  111. $output = parse_result($content);
  112. }
  113. return $output;
  114. }
  115. function ten_random_titles (){
  116. for ($x=0; $x <= 9; $x++) {
  117. $random = get_random_record();
  118. foreach($random as $id => $content){
  119. // Search for the title in the content element and display it
  120. preg_match('/Title.*\n(.*)\n/', $content, $title);
  121. $output[$x] = array($id=>$title[1]);
  122. }
  123. }
  124. return $output;
  125. }
  126. function ten_random_abstracts (){
  127. for ($x=0; $x <= 9; $x++) {
  128. $random = get_random_record();
  129. foreach($random as $id => $content){
  130. // Search for the abstract in the content element and display it
  131. if (preg_match('/Abstract.*\n(.*)\n/', $content, $abstract)){
  132. $output[$x] = array($id=>$abstract[1]);
  133. }
  134. else {
  135. preg_match('/\(.\) \\n\\n(.*)\\n/', $content, $abstract);
  136. $output[$x] = array($id=>$abstract[1]);
  137. }
  138. }
  139. }
  140. return $output;
  141. }
  142. function ten_random_doc_refs (){
  143. $x = 0;
  144. while ($x < 9) {
  145. $random = get_random_record();
  146. foreach($random as $id => $content){
  147. //Set document reference number (used for OPS API)
  148. preg_match('/=D\s(([^\s]*)\s([^\s]*)\s([^\s]*))/', $content, $doc_ref);
  149. $doc_ref = str_replace(' ','',$doc_ref[1]);
  150. if (check_for_images($doc_ref)){
  151. $output[$x] = $doc_ref;
  152. ++$x;
  153. }
  154. }
  155. }
  156. return $output;
  157. }
  158. ?>