A search interface for data from the Politics of Patents case study (part of Copim WP6): this parses data from the archive of RTF files and provides additional data from the European Patent Office API. https://patents.copim.ac.uk
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

224 lines
7.2KB

  1. <?php
  2. function solr_search($search, $core, $sort){
  3. if ($sort == 'relevance'){
  4. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  5. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=content%3A' . $search . '&wt=json';
  6. }
  7. else{
  8. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  9. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=content%3A' . $search . '&wt=json&sort=' . urlencode($sort);
  10. }
  11. // Perform Curl request on the Solr API
  12. $ch = curl_init();
  13. curl_setopt($ch, CURLOPT_URL, $solrurl);
  14. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  15. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  16. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
  17. $response = curl_exec($ch);
  18. curl_close($ch);
  19. // Turn the API response into useful Json
  20. $json = json_decode($response);
  21. // If no results are found, display a message
  22. if ($json->response->numFound == '0'){
  23. $output = 'no results found';
  24. }
  25. else{
  26. foreach ($json->response->docs as $result){
  27. $id = $result->id;
  28. $content = $result->content;
  29. $result_output = parse_result($id, $content);
  30. $output[] = $result_output;
  31. }
  32. }
  33. return $output;
  34. }
  35. function solr_search_id($id, $core){
  36. // URL encode the ID string
  37. $id = urlencode($id);
  38. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  39. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=id%3A"' . $id . '"&wt=json';
  40. // Perform Curl request on the Solr API
  41. $ch = curl_init();
  42. curl_setopt($ch, CURLOPT_URL, $solrurl);
  43. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  44. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  45. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
  46. $response = curl_exec($ch);
  47. curl_close($ch);
  48. // Turn the API response into useful Json
  49. $json = json_decode($response);
  50. // If no results are found, display a message
  51. if ($json->response->numFound == '0'){
  52. $output = 'no results found';
  53. }
  54. else{
  55. foreach ($json->response->docs as $result){
  56. $id = $result->id;
  57. $content = $result->content;
  58. $result_output = parse_result($id, $content);
  59. $output[] = $result_output;
  60. }
  61. }
  62. return $output;
  63. }
  64. function parse_result($id, $input){
  65. $output['id'] = $id;
  66. //Set document reference number (used for OPS API)
  67. if (preg_match('/=D\s(([^\s]*)\s([^\s]*)\s([^\s]*))/', $input, $doc_ref)){
  68. $output['doc_ref'] = str_replace(' ','',$doc_ref[1]);
  69. }
  70. elseif (preg_match('/=D&locale=en_EP\s(([^\s]*)\s([^\s]*)\s([^\s]*))/', $input, $doc_ref)){
  71. $output['doc_ref'] = str_replace(' ','',$doc_ref[1]);
  72. }
  73. // Search for the application ID in the content element and display it
  74. preg_match('/Application.*\n(.*)\n/', $input, $application_id);
  75. $output['application_id'] = $application_id[1];
  76. // Search for the EPO publication URL in the content element and display it
  77. preg_match('/Publication.*\n(.*)\n/', $input, $epo_publication);
  78. $output['epo_publication_url'] = $epo_publication[1];
  79. // Search for the IPC publication URL in the content element and display it
  80. preg_match('/IPC.*\n(.*)\n/', $input, $ipc_publication);
  81. $output['ipc_publication_url'] = $ipc_publication[1];
  82. // Search for the title in the content element and display it
  83. if (preg_match('/Title.*\n(.*)\n/', $input, $title)){
  84. $output['title'] = $title[1];
  85. }
  86. // Search for the abstract in the content element and display it
  87. if (preg_match('/Abstract.*\n(.*)\n/', $input, $abstract)){
  88. $output['abstract'] = $abstract[1];
  89. }
  90. elseif (preg_match('/\(.\) \\n\\n(.*)\\n/', $input, $abstract)) {
  91. $output['abstract'] = $abstract[1];
  92. }
  93. // Search for the year in the content element and display it
  94. if (preg_match('/=D[^\s]*\s[^\s]*\s[^\s]*\s[^\s]*\s(\d{4})/', $input, $year)){
  95. $output['year'] = $year[1];
  96. }
  97. return $output;
  98. }
  99. function get_random_record($core){
  100. // Generate a random number for sorting by random
  101. $random = rand();
  102. // Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
  103. // This query retrieves only the bib identifier field for records which satisfy the search query
  104. $solrurl = 'http://' . $_ENV["SOLR_HOSTNAME"] . ':' . $_ENV["SOLR_PORT"] . '/solr/' . $core . '/select?q.op=OR&q=*%3A*&wt=json&sort=random_' . $random . '%20asc';
  105. // Perform Curl request on the Solr API
  106. $ch = curl_init();
  107. curl_setopt($ch, CURLOPT_URL, $solrurl);
  108. curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
  109. curl_setopt($ch, CURLOPT_HEADER, FALSE);
  110. curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
  111. $response = curl_exec($ch);
  112. curl_close($ch);
  113. // Turn the API response into useful Json
  114. $json = json_decode($response);
  115. // Pick a random key out of the docs array
  116. $random = array_rand($json->response->docs);
  117. //Set ID variable
  118. $id = $json->response->docs[$random]->id;
  119. //Set content variable
  120. $content = $json->response->docs[$random]->content;
  121. //Construct associative array with ID and content
  122. $result_array = array($id=>$content);
  123. return $result_array;
  124. }
  125. function one_random_record ($core){
  126. $random = get_random_record($core);
  127. foreach ($random as $id => $content){
  128. $output = parse_result($id, $content);
  129. }
  130. return $output;
  131. }
  132. function ten_random_titles ($core){
  133. $x = 0;
  134. while ($x <= 9) {
  135. $random = get_random_record($core);
  136. foreach($random as $id => $content){
  137. // Search for the title in the content element and display it
  138. if (preg_match('/Title.*\n(.*)\n/', $content, $title)){
  139. $output[$x] = array($id=>$title[1]);
  140. ++$x;
  141. }
  142. }
  143. }
  144. return $output;
  145. }
  146. function ten_random_abstracts ($core){
  147. $x = 0;
  148. while ($x <= 9) {
  149. $random = get_random_record($core);
  150. foreach($random as $id => $content){
  151. // Search for the abstract in the content element and display it
  152. if (preg_match('/Abstract.*\n(.*)\n/', $content, $abstract)){
  153. $output[$x] = array($id=>$abstract[1]);
  154. ++$x;
  155. }
  156. elseif (preg_match('/\(.\) \\n\\n(.*)\\n/', $content, $abstract)) {
  157. $output[$x] = array($id=>$abstract[1]);
  158. ++$x;
  159. }
  160. }
  161. }
  162. return $output;
  163. }
  164. function ten_random_doc_refs ($core){
  165. $x = 0;
  166. while ($x <= 9) {
  167. $random = get_random_record($core);
  168. foreach($random as $id => $content){
  169. //Set document reference number (used for OPS API)
  170. preg_match('/=D\s(([^\s]*)\s([^\s]*)\s([^\s]*))/', $content, $doc_ref);
  171. $doc_ref = str_replace(' ','',$doc_ref[1]);
  172. if (check_for_images($doc_ref)){
  173. $output[$x] = $doc_ref;
  174. ++$x;
  175. }
  176. }
  177. }
  178. return $output;
  179. }
  180. ?>