A search interface for data from the Politics of Patents case study (part of Copim WP6): this parses data from the archive of RTF files and provides additional data from the European Patent Office API. https://patents.copim.ac.uk
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

216 line
7.0KB

  1. <?php
  2. function solr_search($search, $core, $sort){
  3. if ($sort == 'relevance'){
  4. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  5. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=content%3A' . $search . '&wt=json';
  6. }
  7. else{
  8. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  9. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=content%3A' . $search . '&wt=json&sort=' . $sort . '%20asc';
  10. }
  11. // Perform Curl request on the Solr API
  12. $ch = curl_init();
  13. curl_setopt($ch, CURLOPT_URL, $solrurl);
  14. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  15. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  16. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
  17. $response = curl_exec($ch);
  18. curl_close($ch);
  19. // Turn the API response into useful Json
  20. $json = json_decode($response);
  21. // If no results are found, display a message
  22. if ($json->response->numFound == '0'){
  23. $output = 'no results found';
  24. }
  25. else{
  26. foreach ($json->response->docs as $result){
  27. $id = $result->id;
  28. $content = $result->content;
  29. $result_output = parse_result($id, $content);
  30. $output[] = $result_output;
  31. }
  32. }
  33. return $output;
  34. }
  35. function solr_search_id($id, $core){
  36. // URL encode the ID string
  37. $id = urlencode($id);
  38. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  39. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=id%3A"' . $id . '"&wt=json';
  40. // Perform Curl request on the Solr API
  41. $ch = curl_init();
  42. curl_setopt($ch, CURLOPT_URL, $solrurl);
  43. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  44. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  45. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
  46. $response = curl_exec($ch);
  47. curl_close($ch);
  48. // Turn the API response into useful Json
  49. $json = json_decode($response);
  50. // If no results are found, display a message
  51. if ($json->response->numFound == '0'){
  52. $output = 'no results found';
  53. }
  54. else{
  55. foreach ($json->response->docs as $result){
  56. $id = $result->id;
  57. $content = $result->content;
  58. $result_output = parse_result($id, $content);
  59. $output[] = $result_output;
  60. }
  61. }
  62. return $output;
  63. }
  64. function parse_result($id, $input){
  65. $output['id'] = $id;
  66. //Set document reference number (used for OPS API)
  67. if (preg_match('/=D\s(([^\s]*)\s([^\s]*)\s([^\s]*))/', $input, $doc_ref)){
  68. $output['doc_ref'] = str_replace(' ','',$doc_ref[1]);
  69. }
  70. // Search for the application ID in the content element and display it
  71. preg_match('/Application.*\n(.*)\n/', $input, $application_id);
  72. $output['application_id'] = $application_id[1];
  73. // Search for the EPO publication URL in the content element and display it
  74. preg_match('/Publication.*\n(.*)\n/', $input, $epo_publication);
  75. $output['epo_publication_url'] = $epo_publication[1];
  76. // Search for the IPC publication URL in the content element and display it
  77. preg_match('/IPC.*\n(.*)\n/', $input, $ipc_publication);
  78. $output['ipc_publication_url'] = $ipc_publication[1];
  79. // Search for the title in the content element and display it
  80. if (preg_match('/Title.*\n(.*)\n/', $input, $title)){
  81. $output['title'] = $title[1];
  82. }
  83. // Search for the abstract in the content element and display it
  84. if (preg_match('/Abstract.*\n(.*)\n/', $input, $abstract)){
  85. $output['abstract'] = $abstract[1];
  86. }
  87. elseif (preg_match('/\(.\) \\n\\n(.*)\\n/', $input, $abstract)) {
  88. $output['abstract'] = $abstract[1];
  89. }
  90. // Search for the year in the content element and display it
  91. if (preg_match('/=D[^\s]*\s[^\s]*\s[^\s]*\s[^\s]*\s(\d{4})/', $input, $year)){
  92. $output['year'] = $year[1];
  93. }
  94. return $output;
  95. }
  96. function get_random_record($core){
  97. // Generate a random number for sorting by random
  98. $random = rand();
  99. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  100. // This query retrieves only the bib identifier field for records which satisfy the search query
  101. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=*%3A*&wt=json&sort=random_' . $random . '%20asc';
  102. // Perform Curl request on the Solr API
  103. $ch = curl_init();
  104. curl_setopt($ch, CURLOPT_URL, $solrurl);
  105. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  106. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  107. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
  108. $response = curl_exec($ch);
  109. curl_close($ch);
  110. // Turn the API response into useful Json
  111. $json = json_decode($response);
  112. // Pick a random key out of the docs array
  113. $random = array_rand($json->response->docs);
  114. //Set ID variable
  115. $id = $json->response->docs[$random]->id;
  116. //Set content variable
  117. $content = $json->response->docs[$random]->content;
  118. //Construct associative array with ID and content
  119. $result_array = array($id=>$content);
  120. return $result_array;
  121. }
  122. function one_random_record ($core){
  123. $random = get_random_record($core);
  124. foreach ($random as $id => $content){
  125. $output = parse_result($id, $content);
  126. }
  127. return $output;
  128. }
  129. function ten_random_titles ($core){
  130. for ($x=0; $x <= 9; $x++) {
  131. $random = get_random_record($core);
  132. foreach($random as $id => $content){
  133. // Search for the title in the content element and display it
  134. if (preg_match('/Title.*\n(.*)\n/', $content, $title)){
  135. $output[$x] = array($id=>$title[1]);
  136. }
  137. }
  138. }
  139. return $output;
  140. }
  141. function ten_random_abstracts ($core){
  142. for ($x=0; $x <= 9; $x++) {
  143. $random = get_random_record($core);
  144. foreach($random as $id => $content){
  145. // Search for the abstract in the content element and display it
  146. if (preg_match('/Abstract.*\n(.*)\n/', $content, $abstract)){
  147. $output[$x] = array($id=>$abstract[1]);
  148. }
  149. elseif (preg_match('/\(.\) \\n\\n(.*)\\n/', $content, $abstract)) {
  150. $output[$x] = array($id=>$abstract[1]);
  151. }
  152. }
  153. }
  154. return $output;
  155. }
  156. function ten_random_doc_refs ($core){
  157. $x = 0;
  158. while ($x < 9) {
  159. $random = get_random_record($core);
  160. foreach($random as $id => $content){
  161. //Set document reference number (used for OPS API)
  162. preg_match('/=D\s(([^\s]*)\s([^\s]*)\s([^\s]*))/', $content, $doc_ref);
  163. $doc_ref = str_replace(' ','',$doc_ref[1]);
  164. if (check_for_images($doc_ref)){
  165. $output[$x] = $doc_ref;
  166. ++$x;
  167. }
  168. }
  169. }
  170. return $output;
  171. }
  172. ?>