Ver código fonte

finished faceting for country and year

solr_update
Simon Bowie 2 anos atrás
pai
commit
8a435fa6c2
7 arquivos alterados com 177 adições e 146 exclusões
  1. +2
    -1
      web/app/data.py
  2. +4
    -2
      web/app/random.py
  3. +52
    -64
      web/app/search.py
  4. +60
    -64
      web/app/solr.py
  5. +3
    -3
      web/app/templates/data.html
  6. +1
    -1
      web/app/templates/index.html
  7. +55
    -11
      web/app/templates/search.html

+ 2
- 1
web/app/data.py Ver arquivo

@@ -4,7 +4,8 @@
# @author: Simon Bowie <ad7588@coventry.ac.uk>
# @purpose: data route for data
# @acknowledgements:
# pycountry module for country data
# pycountry module for country data: https://pypi.org/project/pycountry/
# data formatted for Chart.js: https://www.chartjs.org/docs/latest/

from flask import Blueprint, render_template, request
import random

+ 4
- 2
web/app/random.py Ver arquivo

@@ -15,7 +15,8 @@ random = Blueprint('random', __name__)
@random.route('/random/')
def random_record():
core = 'all'
results = solr.get_random_record(core)
search_results = solr.random_search(core)
results = search_results[0]
for result in results:
publication_details = ops.get_publication_details(result['doc_ref'])
result.update(publication_details)
@@ -31,7 +32,8 @@ def two_random_records():
results_list = []
i = 0
while i <= 1:
results = solr.get_random_record(core)
search_results = solr.random_search(core)
results = search_results[0]
for result in results:
publication_details = ops.get_publication_details(result['doc_ref'])
result.update(publication_details)

+ 52
- 64
web/app/search.py Ver arquivo

@@ -9,14 +9,18 @@
from flask import Blueprint, render_template, request, redirect, url_for
from . import solr
from . import ops
import pycountry

search = Blueprint('search', __name__)

# route for search page
# route for basic search page
@search.route('/search/', methods=['GET', 'POST'])
def basic_search():
if request.method == 'POST':
search = request.form.get('search')
if request.form.get('query') is not None:
query = request.form.get('query')
else:
query = None
if request.form.get('core') is not None:
core = request.form.get('core')
else:
@@ -25,15 +29,54 @@ def basic_search():
sort = request.form.get('sort')
else:
sort = 'relevance'
search_results = solr.content_search(core, sort, search)
if request.form.get('country') is not None:
country = request.form.get('country')
else:
country = None
if request.form.get('year') is not None:
year = request.form.get('year')
else:
year = None
else:
if request.args.get('query') is not None:
query = request.args.get('query')
else:
query = None
if request.args.get('core') is not None:
core = request.args.get('core')
else:
core = 'all'
if request.args.get('sort') is not None:
sort = request.args.get('sort')
else:
sort = 'relevance'
if request.args.get('country') is not None:
country = request.args.get('country')
else:
country = None
if request.args.get('year') is not None:
year = request.args.get('year')
else:
year = None
if (query is None and country is None and year is None):
return redirect(url_for('main.index'))
else:
search_results = solr.query_search(core, sort, query, country, year)
results = search_results[0]
num_found = search_results[1]
country_facet = search_results[2]
year_facet = search_results[3]
year_facet = search_results[2]['year']
country_facet = search_results[2]['country']
for i in range(0, len(country_facet)):
if i % 2 == 0:
country_full = pycountry.countries.get(alpha_2=country_facet[i])
if country_full is not None:
country_facet[i] = country_full
else:
country_full = pycountry.historic_countries.get(alpha_2=country_facet[i])
if country_full is not None:
country_facet[i] = country_full
total_number = solr.get_total_number(core)
return render_template('search.html', results=results, num_found=num_found, total_number=total_number, country_facet=country_facet, year_facet=year_facet, search=search, core=core, sort=sort)
else:
return redirect(url_for('main.index'))
return render_template('search.html', results=results, num_found=num_found, total_number=total_number, country_facet=country_facet, year_facet=year_facet, query=query, core=core, sort=sort, country=country, year=year)

# route for id_search page
@search.route('/search/id/', methods=['GET'])
@@ -46,13 +89,8 @@ def id_search():
core = request.args.get('core')
else:
core = 'all'
if request.args.get('sort') is not None:
sort = request.args.get('sort')
else:
sort = 'relevance'
search_results = solr.content_search(core, sort, search, id)
search_results = solr.id_search(core, id)
results = search_results[0]

for result in results:
publication_details = ops.get_publication_details(result['doc_ref'])
result.update(publication_details)
@@ -61,53 +99,3 @@ def id_search():
result.update(image)

return render_template('record.html', results=results)

# route for country search page
@search.route('/search/country/', methods=['GET', 'POST'])
def country_search():
if request.method == 'POST':
country_code = request.form.get('country_code')
core = request.form.get('core')
sort = request.form.get('sort')
else:
country_code = request.args.get('country_code')
core = request.args.get('core')
sort = request.args.get('sort')
if country_code is None:
return redirect(url_for('main.index'))
if core is None:
core = 'all'
if sort is None:
sort = 'relevance'
field = 'country'
search_results = solr.term_search(core, sort, field, country_code)
results = search_results[0]
num_found = search_results[1]
total_number = solr.get_total_number(core)

return render_template('search.html', results=results, num_found=num_found, total_number=total_number, country_code=country_code, core=core, sort=sort)

# route for year search page
@search.route('/search/year/', methods=['GET', 'POST'])
def year_search():
if request.method == 'POST':
year = request.form.get('year')
core = request.form.get('core')
sort = request.form.get('sort')
else:
year = request.args.get('year')
core = request.args.get('core')
sort = request.args.get('sort')
if year is None:
return redirect(url_for('main.index'))
if core is None:
core = 'all'
if sort is None:
sort = 'relevance'
field = 'year'
search_results = solr.term_search(core, sort, field, year)
results = search_results[0]
num_found = search_results[1]
total_number = solr.get_total_number(core)

return render_template('search.html', results=results, num_found=num_found, total_number=total_number, year=year, core=core, sort=sort)

+ 60
- 64
web/app/solr.py Ver arquivo

@@ -4,6 +4,7 @@
# @author: Simon Bowie <simon.bowie.19@gmail.com>
# @purpose: Performs Solr functions
# @acknowledgements:
# pycountry module for country data: https://pypi.org/project/pycountry/

import os
import requests
@@ -17,23 +18,14 @@ from . import ops
solr_hostname = os.environ.get('SOLR_HOSTNAME')
solr_port = os.environ.get('SOLR_PORT')

def content_search(core, sort, search=None, id=None):

# Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
if id is not None:
solrurl = 'http://' + solr_hostname + ':' + solr_port + '/solr/' + core + '/select?q.op=OR&q=id%3A"' + id + '"&wt=json'
else:
if (sort == 'relevance'):
solrurl = 'http://' + solr_hostname + ':' + solr_port + '/solr/' + core + '/select?q.op=OR&q=content%3A' + urllib.parse.quote_plus(search) + '&wt=json&facet.field=country&facet.field=year&facet.sort=count&facet=true'
else:
solrurl = 'http://' + solr_hostname + ':' + solr_port + '/solr/' + core + '/select?q.op=OR&q=content%3A' + urllib.parse.quote_plus(search) + '&wt=json&sort=' + sort + '&facet.field=country&facet.field=year&facet.sort=count&facet=true'

def solr_search(solrurl):
# get result
request = requests.get(solrurl)
# turn the API response into useful Json
json = request.json()

num_found = json['response']['numFound']
facets = []

if (num_found == 0):
output = 'no results found'
@@ -47,39 +39,65 @@ def content_search(core, sort, search=None, id=None):
# parse result
result_output = parse_result(id, content)
output.append(result_output)
country_facet = json['facet_counts']['facet_fields']['country']
year_facet = json['facet_counts']['facet_fields']['year']
return output, num_found, country_facet, year_facet
try:
json['facet_counts']
facets = json['facet_counts']['facet_fields']
except KeyError:
pass

def term_search(core, sort, field, input):
return output, num_found, facets

# Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
def query_search(core, sort, query, country, year):

# assemble parameters for the query string to Solr
if (sort == 'relevance'):
solrurl = 'http://' + solr_hostname + ':' + solr_port + '/solr/' + core + '/select?q.op=OR&q=%7B!term%20f%3D' + field + '%7D' + input + '&wt=json'
sort_parameter = ''
else:
solrurl = 'http://' + solr_hostname + ':' + solr_port + '/solr/' + core + '/select?q.op=OR&q=%7B!term%20f%3D' + field + '%7D' + input + '&wt=json&sort=' + sort
sort_parameter = '&sort=' + sort

# get result
request = requests.get(solrurl)
# turn the API response into useful Json
json = request.json()
if (query is None or query == 'None'):
query_parameter = '&q=*%3A*'
else:
query_parameter = '&q=content%3A' + urllib.parse.quote_plus(query)

num_found = json['response']['numFound']
if (country is None or country == 'None'):
country_parameter = ''
else:
field = 'country'
country_parameter = '&fq=%7B!term%20f%3D' + field + '%7D' + country

if (num_found == 0):
output = 'no results found'
if (year is None or year == 'None'):
year_parameter = ''
else:
output = []
for result in json['response']['docs']:
# set ID variable
id = result['id']
# set content variable
content = result['content']
# parse result
result_output = parse_result(id, content)
output.append(result_output)
field = 'year'
year_parameter = '&fq=%7B!term%20f%3D' + field + '%7D' + year

# assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
solrurl = 'http://' + solr_hostname + ':' + solr_port + '/solr/' + core + '/select?q.op=OR&indent=true' + query_parameter + '&wt=json' + sort_parameter + country_parameter + year_parameter + '&facet.field=country&facet.field=year&facet.sort=count&facet.mincount=1&facet=true'

output = solr_search(solrurl)

return output

def id_search(core, id):

# assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
solrurl = 'http://' + solr_hostname + ':' + solr_port + '/solr/' + core + '/select?q.op=OR&q=id%3A"' + id + '"&wt=json'

output = solr_search(solrurl)

return output, num_found
return output

def random_search(core):

rand = str(random.randint(0, 9999999))

# assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
solrurl = 'http://' + solr_hostname + ':' + solr_port + '/solr/' + core + '/select?q.op=OR&q=*%3A*&wt=json&sort=random_' + rand + '%20asc&rows=1'

output = solr_search(solrurl)

return output

def parse_result(id, input):

@@ -108,7 +126,9 @@ def parse_result(id, input):

# search for the IPC publication URL in the content element and display it
ipc_publication = re.search('IPC.*\n(.*)\n', input)
output['ipc_publication_url'] = ipc_publication.group(1)
if ipc_publication is not None:
if ipc_publication.group(1) is not None:
output['ipc_publication_url'] = ipc_publication.group(1)

# search for the title in the content element and display it
title = re.search('Title.*?\\n(.*?)\\n|Tile.?\\n(.*?)\\n', input)
@@ -150,38 +170,13 @@ def parse_result(id, input):

return output

def get_random_record(core):

rand = str(random.randint(0, 9999999))

# Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
solrurl = 'http://' + solr_hostname + ':' + solr_port + '/solr/' + core + '/select?q.op=OR&q=*%3A*&wt=json&sort=random_' + rand + '%20asc&rows=1'

# get result
request = requests.get(solrurl)
# turn the API response into useful Json
json = request.json()

if (json['response']['numFound'] == 0):
output = 'no results found'
else:
output = []
for result in json['response']['docs']:
# set ID variables
id = result['id']
# set content variable
content = result['content']
# parse result
result_output = parse_result(id, content)
output.append(result_output)
return output

def get_ten_random_elements(field):
core = 'all'
output = []
i = 0
while i <= 9:
results = get_random_record(core)
search_results = random_search(core)
results = search_results[0]
for result in results:
if field in result:
dict = {'id': result['id'], field: result[field]}
@@ -194,7 +189,8 @@ def get_ten_random_images():
output = []
i = 0
while i <= 9:
results = get_random_record(core)
search_results = random_search(core)
results = search_results[0]
for result in results:
if ops.get_images(result['doc_ref']):
image = ops.get_images(result['doc_ref'])

+ 3
- 3
web/app/templates/data.html Ver arquivo

@@ -14,7 +14,7 @@ There are a total of {{ total_number }} patents.
{% if i % 2 %}
{{ year_data[i] }}<br>
{% else %}
<a href="{{ url_for('search.year_search', year=year_data[i]) }}">
<a href="{{ url_for('search.basic_search', year=year_data[i]) }}">
{{ year_data[i] }}
</a>:
{% endif %}
@@ -26,12 +26,12 @@ There are a total of {{ total_number }} patents.
{{ country_data[i] }}<br>
{% else %}
{% if country_data[i].name is defined %}
<a href="{{ url_for('search.country_search', country_code=country_data[i].alpha_2) }}">
<a href="{{ url_for('search.basic_search', country=country_data[i].alpha_2) }}">
{{ country_data[i].name }}
</a>
{{ country_data[i].flag }}:
{% else %}
<a href="{{ url_for('search.country_search', country_code=country_data[i]) }}">
<a href="{{ url_for('search.basic_search', country=country_data[i]) }}">
{{ country_data[i] }}
</a>:
{% endif %}

+ 1
- 1
web/app/templates/index.html Ver arquivo

@@ -41,7 +41,7 @@
<div class="row justify-content-center mt-3">
<div class="col-sm-10 text-center button-search">
<form action="{{ url_for('search.basic_search') }}" method="POST">
<input type="text" name="search" id="inputsearch" placeholder="search for a patent record...">
<input type="text" name="query" id="inputsearch" placeholder="search for a patent record...">
<input type="submit" id="submit" value="search">
</form>
</div>

+ 55
- 11
web/app/templates/search.html Ver arquivo

@@ -11,17 +11,11 @@
</div>

<div class="row p-3">
{% if search is defined %}
<form action="{{ url_for('search.basic_search') }}" method="POST">
<input type="hidden" name="search" value="{{ search }}">
{% elif country_code is defined %}
<form action="{{ url_for('search.country_search') }}" method="POST">
<input type="hidden" name="country_code" value="{{ country_code }}">
{% elif year is defined %}
<form action="{{ url_for('search.year_search') }}" method="POST">
<input type="hidden" name="year" value="{{ year }}">
{% endif %}
<input type="hidden" name="query" value="{{ query }}">
<input type="hidden" name="searchopt" value="{{ core }}">
<input type="hidden" name="country" value="{{ country }}">
<input type="hidden" name="year" value="{{ year }}">
sort by:
<select name="sort" id="sort" onchange="this.form.submit()">
<option value="relevance" {% if sort == 'relevance' %} selected {% endif %}>relevance</option>
@@ -34,9 +28,59 @@
</form>
</div>

{{country_facet}}
<div class="row p-3">
<form action="{{ url_for('search.basic_search') }}" method="POST">
<input type="hidden" name="query" value="{{ query }}">
<input type="hidden" name="searchopt" value="{{ core }}">
<input type="hidden" name="sort" value="{{ sort }}">
<input type="hidden" name="year" value="{{ year }}">
filter by country:
<select name="country" id="sort" onchange="this.form.submit()">
{% if country is defined %}
<option value="None" selected>none</option>
{% else %}
<option>country</option>
{% endif %}
{% for i in range(0, country_facet|length) %}
{% if i % 2 == 0 %}
{% if country_facet[i].name is defined %}
<option value="{{ country_facet[i].alpha_2 }}" {% if country == country_facet[i].alpha_2 %} selected {% endif %}">{{ country_facet[i].name }} ({{ country_facet[i+1] }})</option>
{% else %}
<option value="{{ country_facet[i] }}" {% if country == country_facet[i] %} selected {% endif %}">{{ country_facet[i] }} ({{ country_facet[i+1] }})</option>
{% endif %}
{% endif %}
{% endfor %}
</select>
<noscript>
<input type="submit" class="btn btn-default" value="Set" />
</noscript>
</form>
</div>

{{year_facet}}
<div class="row p-3">
<form action="{{ url_for('search.basic_search') }}" method="POST">
<input type="hidden" name="query" value="{{ query }}">
<input type="hidden" name="searchopt" value="{{ core }}">
<input type="hidden" name="sort" value="{{ sort }}">
<input type="hidden" name="country" value="{{ country }}">
filter by year:
<select name="year" id="sort" onchange="this.form.submit()">
{% if year is defined %}
<option value="None" selected>none</option>
{% else %}
<option>year</option>
{% endif %}
{% for i in range(0, year_facet|length) %}
{% if i % 2 == 0 %}
<option value="{{ year_facet[i] }}" {% if year == year_facet[i] %} selected {% endif %}">{{ year_facet[i] }} ({{ year_facet[i+1] }})</option>
{% endif %}
{% endfor %}
</select>
<noscript>
<input type="submit" class="btn btn-default" value="Set" />
</noscript>
</form>
</div>

{% if results == 'no results found' %}


Carregando…
Cancelar
Salvar