A search interface for data from the Politics of Patents case study (part of Copim WP6): this parses data from the archive of RTF files and provides additional data from the European Patent Office API. https://patents.copim.ac.uk
Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

206 lines
6.4KB

  1. <?php
  2. function solr_search($search, $core){
  3. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  4. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=content%3A' . $search . '&wt=json';
  5. // Perform Curl request on the Solr API
  6. $ch = curl_init();
  7. curl_setopt($ch, CURLOPT_URL, $solrurl);
  8. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  9. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  10. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
  11. $response = curl_exec($ch);
  12. curl_close($ch);
  13. // Turn the API response into useful Json
  14. $json = json_decode($response);
  15. // If no results are found, display a message
  16. if ($json->response->numFound == '0'){
  17. $output = 'no results found';
  18. }
  19. else{
  20. foreach ($json->response->docs as $result){
  21. $id = $result->id;
  22. $content = $result->content;
  23. $result_output = parse_result($id, $content);
  24. $output[] = $result_output;
  25. }
  26. }
  27. return $output;
  28. }
  29. function solr_search_id($id, $core){
  30. // URL encode the ID string
  31. $id = urlencode($id);
  32. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  33. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=id%3A"' . $id . '"&wt=json';
  34. // Perform Curl request on the Solr API
  35. $ch = curl_init();
  36. curl_setopt($ch, CURLOPT_URL, $solrurl);
  37. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  38. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  39. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
  40. $response = curl_exec($ch);
  41. curl_close($ch);
  42. // Turn the API response into useful Json
  43. $json = json_decode($response);
  44. // If no results are found, display a message
  45. if ($json->response->numFound == '0'){
  46. $output = 'no results found';
  47. }
  48. else{
  49. foreach ($json->response->docs as $result){
  50. $id = $result->id;
  51. $content = $result->content;
  52. $result_output = parse_result($id, $content);
  53. $output[] = $result_output;
  54. }
  55. }
  56. return $output;
  57. }
  58. function parse_result($id, $input){
  59. $output['id'] = $id;
  60. //Set document reference number (used for OPS API)
  61. if (preg_match('/=D\s(([^\s]*)\s([^\s]*)\s([^\s]*))/', $input, $doc_ref)){
  62. $output['doc_ref'] = str_replace(' ','',$doc_ref[1]);
  63. }
  64. // Search for the application ID in the content element and display it
  65. preg_match('/Application.*\n(.*)\n/', $input, $application_id);
  66. $output['application_id'] = $application_id[1];
  67. // Search for the EPO publication URL in the content element and display it
  68. preg_match('/Publication.*\n(.*)\n/', $input, $epo_publication);
  69. $output['epo_publication_url'] = $epo_publication[1];
  70. // Search for the IPC publication URL in the content element and display it
  71. preg_match('/IPC.*\n(.*)\n/', $input, $ipc_publication);
  72. $output['ipc_publication_url'] = $ipc_publication[1];
  73. // Search for the title in the content element and display it
  74. if (preg_match('/Title.*\n(.*)\n/', $input, $title)){
  75. $output['title'] = $title[1];
  76. }
  77. // Search for the abstract in the content element and display it
  78. if (preg_match('/Abstract.*\n(.*)\n/', $input, $abstract)){
  79. $output['abstract'] = $abstract[1];
  80. }
  81. elseif (preg_match('/\(.\) \\n\\n(.*)\\n/', $input, $abstract)) {
  82. $output['abstract'] = $abstract[1];
  83. }
  84. return $output;
  85. }
  86. function get_random_record($core){
  87. // Generate a random number for sorting by random
  88. $random = rand();
  89. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  90. // This query retrieves only the bib identifier field for records which satisfy the search query
  91. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=*%3A*&wt=json&sort=random_' . $random . '%20asc';
  92. // Perform Curl request on the Solr API
  93. $ch = curl_init();
  94. curl_setopt($ch, CURLOPT_URL, $solrurl);
  95. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  96. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  97. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
  98. $response = curl_exec($ch);
  99. curl_close($ch);
  100. // Turn the API response into useful Json
  101. $json = json_decode($response);
  102. // Pick a random key out of the docs array
  103. $random = array_rand($json->response->docs);
  104. //Set ID variable
  105. $id = $json->response->docs[$random]->id;
  106. //Set content variable
  107. $content = $json->response->docs[$random]->content;
  108. //Construct associative array with ID and content
  109. $result_array = array($id=>$content);
  110. return $result_array;
  111. }
  112. function one_random_record ($core){
  113. $random = get_random_record($core);
  114. foreach ($random as $id => $content){
  115. $output = parse_result($id, $content);
  116. }
  117. return $output;
  118. }
  119. function ten_random_titles (){
  120. for ($x=0; $x <= 9; $x++) {
  121. $random = get_random_record();
  122. foreach($random as $id => $content){
  123. // Search for the title in the content element and display it
  124. preg_match('/Title.*\n(.*)\n/', $content, $title);
  125. $output[$x] = array($id=>$title[1]);
  126. }
  127. }
  128. return $output;
  129. }
  130. function ten_random_abstracts (){
  131. for ($x=0; $x <= 9; $x++) {
  132. $random = get_random_record();
  133. foreach($random as $id => $content){
  134. // Search for the abstract in the content element and display it
  135. if (preg_match('/Abstract.*\n(.*)\n/', $content, $abstract)){
  136. $output[$x] = array($id=>$abstract[1]);
  137. }
  138. else {
  139. preg_match('/\(.\) \\n\\n(.*)\\n/', $content, $abstract);
  140. $output[$x] = array($id=>$abstract[1]);
  141. }
  142. }
  143. }
  144. return $output;
  145. }
  146. function ten_random_doc_refs (){
  147. $x = 0;
  148. while ($x < 9) {
  149. $random = get_random_record();
  150. foreach($random as $id => $content){
  151. //Set document reference number (used for OPS API)
  152. preg_match('/=D\s(([^\s]*)\s([^\s]*)\s([^\s]*))/', $content, $doc_ref);
  153. $doc_ref = str_replace(' ','',$doc_ref[1]);
  154. if (check_for_images($doc_ref)){
  155. $output[$x] = $doc_ref;
  156. ++$x;
  157. }
  158. }
  159. }
  160. return $output;
  161. }
  162. ?>