A search interface for data from the Politics of Patents case study (part of Copim WP6): this parses data from the archive of RTF files and provides additional data from the European Patent Office API. https://patents.copim.ac.uk
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

solr.php 7.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. <?php
  2. function solr_search($search, $core, $sort){
  3. // URL encode the search term
  4. $search = urlencode($search);
  5. if ($sort == 'relevance'){
  6. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  7. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=content%3A' . $search . '&wt=json';
  8. }
  9. else{
  10. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  11. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=content%3A' . $search . '&wt=json&sort=' . urlencode($sort);
  12. }
  13. // Perform Curl request on the Solr API
  14. $ch = curl_init();
  15. curl_setopt($ch, CURLOPT_URL, $solrurl);
  16. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  17. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  18. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
  19. $response = curl_exec($ch);
  20. curl_close($ch);
  21. // Turn the API response into useful Json
  22. $json = json_decode($response);
  23. // If no results are found, display a message
  24. if ($json->response->numFound == '0'){
  25. $output = 'no results found';
  26. }
  27. else{
  28. foreach ($json->response->docs as $result){
  29. $id = $result->id;
  30. $content = $result->content;
  31. $result_output = parse_result($id, $content);
  32. $output[] = $result_output;
  33. }
  34. }
  35. return $output;
  36. }
  37. function solr_search_id($id, $core){
  38. // URL encode the ID string
  39. $id = urlencode($id);
  40. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  41. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=id%3A"' . $id . '"&wt=json';
  42. // Perform Curl request on the Solr API
  43. $ch = curl_init();
  44. curl_setopt($ch, CURLOPT_URL, $solrurl);
  45. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  46. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  47. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
  48. $response = curl_exec($ch);
  49. curl_close($ch);
  50. // Turn the API response into useful Json
  51. $json = json_decode($response);
  52. // If no results are found, display a message
  53. if ($json->response->numFound == '0'){
  54. $output = 'no results found';
  55. }
  56. else{
  57. foreach ($json->response->docs as $result){
  58. $id = $result->id;
  59. $content = $result->content;
  60. $result_output = parse_result($id, $content);
  61. $output[] = $result_output;
  62. }
  63. }
  64. return $output;
  65. }
  66. function parse_result($id, $input){
  67. $output['id'] = $id;
  68. //Set document reference number (used for OPS API)
  69. if (preg_match('/=D\s(([^\s]*)\s([^\s]*)\s([^\s]*))/', $input, $doc_ref)){
  70. $output['doc_ref'] = str_replace(' ','',$doc_ref[1]);
  71. }
  72. elseif (preg_match('/=D&locale=en_EP\s(([^\s]*)\s([^\s]*)\s([^\s]*))/', $input, $doc_ref)){
  73. $output['doc_ref'] = str_replace(' ','',$doc_ref[1]);
  74. }
  75. // Search for the application ID in the content element and display it
  76. preg_match('/Application.*\n(.*)\n/', $input, $application_id);
  77. $output['application_id'] = $application_id[1];
  78. // Search for the EPO publication URL in the content element and display it
  79. preg_match('/Publication.*\n(.*)\n/', $input, $epo_publication);
  80. $output['epo_publication_url'] = $epo_publication[1];
  81. // Search for the IPC publication URL in the content element and display it
  82. preg_match('/IPC.*\n(.*)\n/', $input, $ipc_publication);
  83. $output['ipc_publication_url'] = $ipc_publication[1];
  84. // Search for the title in the content element and display it
  85. if (preg_match('/Title.*\n(.*)\n/', $input, $title)){
  86. $output['title'] = $title[1];
  87. }
  88. // Search for the abstract in the content element and display it
  89. if (preg_match('/Abstract.*\n(.*)\n/', $input, $abstract)){
  90. $output['abstract'] = $abstract[1];
  91. }
  92. elseif (preg_match('/\(.\) \\n\\n(.*)\\n/', $input, $abstract)) {
  93. $output['abstract'] = $abstract[1];
  94. }
  95. // Search for the year in the content element and display it
  96. if (preg_match('/=D[^\s]*\s[^\s]*\s[^\s]*\s[^\s]*\s(\d{4})/', $input, $year)){
  97. $output['year'] = $year[1];
  98. }
  99. return $output;
  100. }
  101. function get_random_record($core){
  102. // Generate a random number for sorting by random
  103. $random = rand();
  104. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  105. // This query retrieves only the bib identifier field for records which satisfy the search query
  106. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=*%3A*&wt=json&sort=random_' . $random . '%20asc';
  107. // Perform Curl request on the Solr API
  108. $ch = curl_init();
  109. curl_setopt($ch, CURLOPT_URL, $solrurl);
  110. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  111. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  112. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
  113. $response = curl_exec($ch);
  114. curl_close($ch);
  115. // Turn the API response into useful Json
  116. $json = json_decode($response);
  117. // Pick a random key out of the docs array
  118. $random = array_rand($json->response->docs);
  119. //Set ID variable
  120. $id = $json->response->docs[$random]->id;
  121. //Set content variable
  122. $content = $json->response->docs[$random]->content;
  123. //Construct associative array with ID and content
  124. $result_array = array($id=>$content);
  125. return $result_array;
  126. }
  127. function one_random_record ($core){
  128. $random = get_random_record($core);
  129. foreach ($random as $id => $content){
  130. $output = parse_result($id, $content);
  131. }
  132. return $output;
  133. }
  134. function two_random_records($core){
  135. $x = 0;
  136. while ($x <= 1) {
  137. $random = get_random_record($core);
  138. foreach($random as $id => $content){
  139. $output[$x] = parse_result($id, $content);
  140. ++$x;
  141. }
  142. }
  143. return $output;
  144. }
  145. function ten_random_titles ($core){
  146. $x = 0;
  147. while ($x <= 9) {
  148. $random = get_random_record($core);
  149. foreach($random as $id => $content){
  150. // Search for the title in the content element and display it
  151. if (preg_match('/Title.*\n(.*)\n/', $content, $title)){
  152. $output[$x] = array($id=>$title[1]);
  153. ++$x;
  154. }
  155. }
  156. }
  157. return $output;
  158. }
  159. function ten_random_abstracts ($core){
  160. $x = 0;
  161. while ($x <= 9) {
  162. $random = get_random_record($core);
  163. foreach($random as $id => $content){
  164. // Search for the abstract in the content element and display it
  165. if (preg_match('/Abstract.*\n(.*)\n/', $content, $abstract)){
  166. $output[$x] = array($id=>$abstract[1]);
  167. ++$x;
  168. }
  169. elseif (preg_match('/\(.\) \\n\\n(.*)\\n/', $content, $abstract)) {
  170. $output[$x] = array($id=>$abstract[1]);
  171. ++$x;
  172. }
  173. }
  174. }
  175. return $output;
  176. }
  177. function ten_random_doc_refs ($core){
  178. $x = 0;
  179. while ($x <= 9) {
  180. $random = get_random_record($core);
  181. foreach($random as $id => $content){
  182. //Set document reference number (used for OPS API)
  183. preg_match('/=D\s(([^\s]*)\s([^\s]*)\s([^\s]*))/', $content, $doc_ref);
  184. $doc_ref = str_replace(' ','',$doc_ref[1]);
  185. if (check_for_images($doc_ref)){
  186. $output[$x] = $doc_ref;
  187. ++$x;
  188. }
  189. }
  190. }
  191. return $output;
  192. }
  193. ?>