Browse Source

finished faceting for country and year

solr_update
Simon Bowie 2 years ago
parent
commit
8a435fa6c2
7 changed files with 177 additions and 146 deletions
  1. +2
    -1
      web/app/data.py
  2. +4
    -2
      web/app/random.py
  3. +52
    -64
      web/app/search.py
  4. +60
    -64
      web/app/solr.py
  5. +3
    -3
      web/app/templates/data.html
  6. +1
    -1
      web/app/templates/index.html
  7. +55
    -11
      web/app/templates/search.html

+ 2
- 1
web/app/data.py View File

# @author: Simon Bowie <ad7588@coventry.ac.uk> # @author: Simon Bowie <ad7588@coventry.ac.uk>
# @purpose: data route for data # @purpose: data route for data
# @acknowledgements: # @acknowledgements:
# pycountry module for country data
# pycountry module for country data: https://pypi.org/project/pycountry/
# data formatted for Chart.js: https://www.chartjs.org/docs/latest/


from flask import Blueprint, render_template, request from flask import Blueprint, render_template, request
import random import random

+ 4
- 2
web/app/random.py View File

@random.route('/random/') @random.route('/random/')
def random_record(): def random_record():
core = 'all' core = 'all'
results = solr.get_random_record(core)
search_results = solr.random_search(core)
results = search_results[0]
for result in results: for result in results:
publication_details = ops.get_publication_details(result['doc_ref']) publication_details = ops.get_publication_details(result['doc_ref'])
result.update(publication_details) result.update(publication_details)
results_list = [] results_list = []
i = 0 i = 0
while i <= 1: while i <= 1:
results = solr.get_random_record(core)
search_results = solr.random_search(core)
results = search_results[0]
for result in results: for result in results:
publication_details = ops.get_publication_details(result['doc_ref']) publication_details = ops.get_publication_details(result['doc_ref'])
result.update(publication_details) result.update(publication_details)

+ 52
- 64
web/app/search.py View File

from flask import Blueprint, render_template, request, redirect, url_for from flask import Blueprint, render_template, request, redirect, url_for
from . import solr from . import solr
from . import ops from . import ops
import pycountry


search = Blueprint('search', __name__) search = Blueprint('search', __name__)


# route for search page
# route for basic search page
@search.route('/search/', methods=['GET', 'POST']) @search.route('/search/', methods=['GET', 'POST'])
def basic_search(): def basic_search():
if request.method == 'POST': if request.method == 'POST':
search = request.form.get('search')
if request.form.get('query') is not None:
query = request.form.get('query')
else:
query = None
if request.form.get('core') is not None: if request.form.get('core') is not None:
core = request.form.get('core') core = request.form.get('core')
else: else:
sort = request.form.get('sort') sort = request.form.get('sort')
else: else:
sort = 'relevance' sort = 'relevance'
search_results = solr.content_search(core, sort, search)
if request.form.get('country') is not None:
country = request.form.get('country')
else:
country = None
if request.form.get('year') is not None:
year = request.form.get('year')
else:
year = None
else:
if request.args.get('query') is not None:
query = request.args.get('query')
else:
query = None
if request.args.get('core') is not None:
core = request.args.get('core')
else:
core = 'all'
if request.args.get('sort') is not None:
sort = request.args.get('sort')
else:
sort = 'relevance'
if request.args.get('country') is not None:
country = request.args.get('country')
else:
country = None
if request.args.get('year') is not None:
year = request.args.get('year')
else:
year = None
if (query is None and country is None and year is None):
return redirect(url_for('main.index'))
else:
search_results = solr.query_search(core, sort, query, country, year)
results = search_results[0] results = search_results[0]
num_found = search_results[1] num_found = search_results[1]
country_facet = search_results[2]
year_facet = search_results[3]
year_facet = search_results[2]['year']
country_facet = search_results[2]['country']
for i in range(0, len(country_facet)):
if i % 2 == 0:
country_full = pycountry.countries.get(alpha_2=country_facet[i])
if country_full is not None:
country_facet[i] = country_full
else:
country_full = pycountry.historic_countries.get(alpha_2=country_facet[i])
if country_full is not None:
country_facet[i] = country_full
total_number = solr.get_total_number(core) total_number = solr.get_total_number(core)
return render_template('search.html', results=results, num_found=num_found, total_number=total_number, country_facet=country_facet, year_facet=year_facet, search=search, core=core, sort=sort)
else:
return redirect(url_for('main.index'))
return render_template('search.html', results=results, num_found=num_found, total_number=total_number, country_facet=country_facet, year_facet=year_facet, query=query, core=core, sort=sort, country=country, year=year)


# route for id_search page # route for id_search page
@search.route('/search/id/', methods=['GET']) @search.route('/search/id/', methods=['GET'])
core = request.args.get('core') core = request.args.get('core')
else: else:
core = 'all' core = 'all'
if request.args.get('sort') is not None:
sort = request.args.get('sort')
else:
sort = 'relevance'
search_results = solr.content_search(core, sort, search, id)
search_results = solr.id_search(core, id)
results = search_results[0] results = search_results[0]

for result in results: for result in results:
publication_details = ops.get_publication_details(result['doc_ref']) publication_details = ops.get_publication_details(result['doc_ref'])
result.update(publication_details) result.update(publication_details)
result.update(image) result.update(image)


return render_template('record.html', results=results) return render_template('record.html', results=results)

# route for country search page
@search.route('/search/country/', methods=['GET', 'POST'])
def country_search():
if request.method == 'POST':
country_code = request.form.get('country_code')
core = request.form.get('core')
sort = request.form.get('sort')
else:
country_code = request.args.get('country_code')
core = request.args.get('core')
sort = request.args.get('sort')
if country_code is None:
return redirect(url_for('main.index'))
if core is None:
core = 'all'
if sort is None:
sort = 'relevance'
field = 'country'
search_results = solr.term_search(core, sort, field, country_code)
results = search_results[0]
num_found = search_results[1]
total_number = solr.get_total_number(core)

return render_template('search.html', results=results, num_found=num_found, total_number=total_number, country_code=country_code, core=core, sort=sort)

# route for year search page
@search.route('/search/year/', methods=['GET', 'POST'])
def year_search():
if request.method == 'POST':
year = request.form.get('year')
core = request.form.get('core')
sort = request.form.get('sort')
else:
year = request.args.get('year')
core = request.args.get('core')
sort = request.args.get('sort')
if year is None:
return redirect(url_for('main.index'))
if core is None:
core = 'all'
if sort is None:
sort = 'relevance'
field = 'year'
search_results = solr.term_search(core, sort, field, year)
results = search_results[0]
num_found = search_results[1]
total_number = solr.get_total_number(core)

return render_template('search.html', results=results, num_found=num_found, total_number=total_number, year=year, core=core, sort=sort)

+ 60
- 64
web/app/solr.py View File

# @author: Simon Bowie <simon.bowie.19@gmail.com> # @author: Simon Bowie <simon.bowie.19@gmail.com>
# @purpose: Performs Solr functions # @purpose: Performs Solr functions
# @acknowledgements: # @acknowledgements:
# pycountry module for country data: https://pypi.org/project/pycountry/


import os import os
import requests import requests
solr_hostname = os.environ.get('SOLR_HOSTNAME') solr_hostname = os.environ.get('SOLR_HOSTNAME')
solr_port = os.environ.get('SOLR_PORT') solr_port = os.environ.get('SOLR_PORT')


def content_search(core, sort, search=None, id=None):

# Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
if id is not None:
solrurl = 'http://' + solr_hostname + ':' + solr_port + '/solr/' + core + '/select?q.op=OR&q=id%3A"' + id + '"&wt=json'
else:
if (sort == 'relevance'):
solrurl = 'http://' + solr_hostname + ':' + solr_port + '/solr/' + core + '/select?q.op=OR&q=content%3A' + urllib.parse.quote_plus(search) + '&wt=json&facet.field=country&facet.field=year&facet.sort=count&facet=true'
else:
solrurl = 'http://' + solr_hostname + ':' + solr_port + '/solr/' + core + '/select?q.op=OR&q=content%3A' + urllib.parse.quote_plus(search) + '&wt=json&sort=' + sort + '&facet.field=country&facet.field=year&facet.sort=count&facet=true'

def solr_search(solrurl):
# get result # get result
request = requests.get(solrurl) request = requests.get(solrurl)
# turn the API response into useful Json # turn the API response into useful Json
json = request.json() json = request.json()


num_found = json['response']['numFound'] num_found = json['response']['numFound']
facets = []


if (num_found == 0): if (num_found == 0):
output = 'no results found' output = 'no results found'
# parse result # parse result
result_output = parse_result(id, content) result_output = parse_result(id, content)
output.append(result_output) output.append(result_output)
country_facet = json['facet_counts']['facet_fields']['country']
year_facet = json['facet_counts']['facet_fields']['year']
return output, num_found, country_facet, year_facet
try:
json['facet_counts']
facets = json['facet_counts']['facet_fields']
except KeyError:
pass


def term_search(core, sort, field, input):
return output, num_found, facets


# Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
def query_search(core, sort, query, country, year):

# assemble parameters for the query string to Solr
if (sort == 'relevance'): if (sort == 'relevance'):
solrurl = 'http://' + solr_hostname + ':' + solr_port + '/solr/' + core + '/select?q.op=OR&q=%7B!term%20f%3D' + field + '%7D' + input + '&wt=json'
sort_parameter = ''
else: else:
solrurl = 'http://' + solr_hostname + ':' + solr_port + '/solr/' + core + '/select?q.op=OR&q=%7B!term%20f%3D' + field + '%7D' + input + '&wt=json&sort=' + sort
sort_parameter = '&sort=' + sort


# get result
request = requests.get(solrurl)
# turn the API response into useful Json
json = request.json()
if (query is None or query == 'None'):
query_parameter = '&q=*%3A*'
else:
query_parameter = '&q=content%3A' + urllib.parse.quote_plus(query)


num_found = json['response']['numFound']
if (country is None or country == 'None'):
country_parameter = ''
else:
field = 'country'
country_parameter = '&fq=%7B!term%20f%3D' + field + '%7D' + country


if (num_found == 0):
output = 'no results found'
if (year is None or year == 'None'):
year_parameter = ''
else: else:
output = []
for result in json['response']['docs']:
# set ID variable
id = result['id']
# set content variable
content = result['content']
# parse result
result_output = parse_result(id, content)
output.append(result_output)
field = 'year'
year_parameter = '&fq=%7B!term%20f%3D' + field + '%7D' + year

# assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
solrurl = 'http://' + solr_hostname + ':' + solr_port + '/solr/' + core + '/select?q.op=OR&indent=true' + query_parameter + '&wt=json' + sort_parameter + country_parameter + year_parameter + '&facet.field=country&facet.field=year&facet.sort=count&facet.mincount=1&facet=true'

output = solr_search(solrurl)

return output

def id_search(core, id):

# assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
solrurl = 'http://' + solr_hostname + ':' + solr_port + '/solr/' + core + '/select?q.op=OR&q=id%3A"' + id + '"&wt=json'

output = solr_search(solrurl)


return output, num_found
return output

def random_search(core):

rand = str(random.randint(0, 9999999))

# assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
solrurl = 'http://' + solr_hostname + ':' + solr_port + '/solr/' + core + '/select?q.op=OR&q=*%3A*&wt=json&sort=random_' + rand + '%20asc&rows=1'

output = solr_search(solrurl)

return output


def parse_result(id, input): def parse_result(id, input):




# search for the IPC publication URL in the content element and display it # search for the IPC publication URL in the content element and display it
ipc_publication = re.search('IPC.*\n(.*)\n', input) ipc_publication = re.search('IPC.*\n(.*)\n', input)
output['ipc_publication_url'] = ipc_publication.group(1)
if ipc_publication is not None:
if ipc_publication.group(1) is not None:
output['ipc_publication_url'] = ipc_publication.group(1)


# search for the title in the content element and display it # search for the title in the content element and display it
title = re.search('Title.*?\\n(.*?)\\n|Tile.?\\n(.*?)\\n', input) title = re.search('Title.*?\\n(.*?)\\n|Tile.?\\n(.*?)\\n', input)


return output return output


def get_random_record(core):

rand = str(random.randint(0, 9999999))

# Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
solrurl = 'http://' + solr_hostname + ':' + solr_port + '/solr/' + core + '/select?q.op=OR&q=*%3A*&wt=json&sort=random_' + rand + '%20asc&rows=1'

# get result
request = requests.get(solrurl)
# turn the API response into useful Json
json = request.json()

if (json['response']['numFound'] == 0):
output = 'no results found'
else:
output = []
for result in json['response']['docs']:
# set ID variables
id = result['id']
# set content variable
content = result['content']
# parse result
result_output = parse_result(id, content)
output.append(result_output)
return output

def get_ten_random_elements(field): def get_ten_random_elements(field):
core = 'all' core = 'all'
output = [] output = []
i = 0 i = 0
while i <= 9: while i <= 9:
results = get_random_record(core)
search_results = random_search(core)
results = search_results[0]
for result in results: for result in results:
if field in result: if field in result:
dict = {'id': result['id'], field: result[field]} dict = {'id': result['id'], field: result[field]}
output = [] output = []
i = 0 i = 0
while i <= 9: while i <= 9:
results = get_random_record(core)
search_results = random_search(core)
results = search_results[0]
for result in results: for result in results:
if ops.get_images(result['doc_ref']): if ops.get_images(result['doc_ref']):
image = ops.get_images(result['doc_ref']) image = ops.get_images(result['doc_ref'])

+ 3
- 3
web/app/templates/data.html View File

{% if i % 2 %} {% if i % 2 %}
{{ year_data[i] }}<br> {{ year_data[i] }}<br>
{% else %} {% else %}
<a href="{{ url_for('search.year_search', year=year_data[i]) }}">
<a href="{{ url_for('search.basic_search', year=year_data[i]) }}">
{{ year_data[i] }} {{ year_data[i] }}
</a>: </a>:
{% endif %} {% endif %}
{{ country_data[i] }}<br> {{ country_data[i] }}<br>
{% else %} {% else %}
{% if country_data[i].name is defined %} {% if country_data[i].name is defined %}
<a href="{{ url_for('search.country_search', country_code=country_data[i].alpha_2) }}">
<a href="{{ url_for('search.basic_search', country=country_data[i].alpha_2) }}">
{{ country_data[i].name }} {{ country_data[i].name }}
</a> </a>
{{ country_data[i].flag }}: {{ country_data[i].flag }}:
{% else %} {% else %}
<a href="{{ url_for('search.country_search', country_code=country_data[i]) }}">
<a href="{{ url_for('search.basic_search', country=country_data[i]) }}">
{{ country_data[i] }} {{ country_data[i] }}
</a>: </a>:
{% endif %} {% endif %}

+ 1
- 1
web/app/templates/index.html View File

<div class="row justify-content-center mt-3"> <div class="row justify-content-center mt-3">
<div class="col-sm-10 text-center button-search"> <div class="col-sm-10 text-center button-search">
<form action="{{ url_for('search.basic_search') }}" method="POST"> <form action="{{ url_for('search.basic_search') }}" method="POST">
<input type="text" name="search" id="inputsearch" placeholder="search for a patent record...">
<input type="text" name="query" id="inputsearch" placeholder="search for a patent record...">
<input type="submit" id="submit" value="search"> <input type="submit" id="submit" value="search">
</form> </form>
</div> </div>

+ 55
- 11
web/app/templates/search.html View File

</div> </div>


<div class="row p-3"> <div class="row p-3">
{% if search is defined %}
<form action="{{ url_for('search.basic_search') }}" method="POST"> <form action="{{ url_for('search.basic_search') }}" method="POST">
<input type="hidden" name="search" value="{{ search }}">
{% elif country_code is defined %}
<form action="{{ url_for('search.country_search') }}" method="POST">
<input type="hidden" name="country_code" value="{{ country_code }}">
{% elif year is defined %}
<form action="{{ url_for('search.year_search') }}" method="POST">
<input type="hidden" name="year" value="{{ year }}">
{% endif %}
<input type="hidden" name="query" value="{{ query }}">
<input type="hidden" name="searchopt" value="{{ core }}"> <input type="hidden" name="searchopt" value="{{ core }}">
<input type="hidden" name="country" value="{{ country }}">
<input type="hidden" name="year" value="{{ year }}">
sort by: sort by:
<select name="sort" id="sort" onchange="this.form.submit()"> <select name="sort" id="sort" onchange="this.form.submit()">
<option value="relevance" {% if sort == 'relevance' %} selected {% endif %}>relevance</option> <option value="relevance" {% if sort == 'relevance' %} selected {% endif %}>relevance</option>
</form> </form>
</div> </div>


{{country_facet}}
<div class="row p-3">
<form action="{{ url_for('search.basic_search') }}" method="POST">
<input type="hidden" name="query" value="{{ query }}">
<input type="hidden" name="searchopt" value="{{ core }}">
<input type="hidden" name="sort" value="{{ sort }}">
<input type="hidden" name="year" value="{{ year }}">
filter by country:
<select name="country" id="sort" onchange="this.form.submit()">
{% if country is defined %}
<option value="None" selected>none</option>
{% else %}
<option>country</option>
{% endif %}
{% for i in range(0, country_facet|length) %}
{% if i % 2 == 0 %}
{% if country_facet[i].name is defined %}
<option value="{{ country_facet[i].alpha_2 }}" {% if country == country_facet[i].alpha_2 %} selected {% endif %}">{{ country_facet[i].name }} ({{ country_facet[i+1] }})</option>
{% else %}
<option value="{{ country_facet[i] }}" {% if country == country_facet[i] %} selected {% endif %}">{{ country_facet[i] }} ({{ country_facet[i+1] }})</option>
{% endif %}
{% endif %}
{% endfor %}
</select>
<noscript>
<input type="submit" class="btn btn-default" value="Set" />
</noscript>
</form>
</div>


{{year_facet}}
<div class="row p-3">
<form action="{{ url_for('search.basic_search') }}" method="POST">
<input type="hidden" name="query" value="{{ query }}">
<input type="hidden" name="searchopt" value="{{ core }}">
<input type="hidden" name="sort" value="{{ sort }}">
<input type="hidden" name="country" value="{{ country }}">
filter by year:
<select name="year" id="sort" onchange="this.form.submit()">
{% if year is defined %}
<option value="None" selected>none</option>
{% else %}
<option>year</option>
{% endif %}
{% for i in range(0, year_facet|length) %}
{% if i % 2 == 0 %}
<option value="{{ year_facet[i] }}" {% if year == year_facet[i] %} selected {% endif %}">{{ year_facet[i] }} ({{ year_facet[i+1] }})</option>
{% endif %}
{% endfor %}
</select>
<noscript>
<input type="submit" class="btn btn-default" value="Set" />
</noscript>
</form>
</div>


{% if results == 'no results found' %} {% if results == 'no results found' %}



Loading…
Cancel
Save