Parcourir la source

added 'country' field to Solr and data route using Chart.js

solr_update
Simon Bowie il y a 2 ans
Parent
révision
20ca7e2f9c
8 fichiers modifiés avec 234 ajouts et 16 suppressions
  1. +11
    -0
      solr_config/schema.xml
  2. +4
    -0
      web/app/__init__.py
  3. +55
    -0
      web/app/data.py
  4. +20
    -15
      web/app/search.py
  5. +32
    -1
      web/app/solr.py
  6. +3
    -0
      web/app/templates/base.html
  7. +108
    -0
      web/app/templates/data.html
  8. +1
    -0
      web/requirements.txt

+ 11
- 0
solr_config/schema.xml Voir le fichier

@@ -527,4 +527,15 @@
</fieldType>
<!-- END -->

<!-- ADDED BY SIMON BOWIE 2022-08-14 -->
<copyField source="content" dest="country"/>
<field name="country" type="country" indexed="true" stored="true"/>

<fieldType name="country" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.PatternTokenizerFactory" pattern="FT=D[^\s]*\s(\w{2})" group="1" />
</analyzer>
</fieldType>
<!-- END -->

</schema>

+ 4
- 0
web/app/__init__.py Voir le fichier

@@ -31,4 +31,8 @@ def create_app():
from .random import random as random_blueprint
app.register_blueprint(random_blueprint)

# blueprint for data parts of app
from .data import data as data_blueprint
app.register_blueprint(data_blueprint)

return app

+ 55
- 0
web/app/data.py Voir le fichier

@@ -0,0 +1,55 @@
# @name: data.py
# @creation_date: 2022-09-14
# @license: The MIT License <https://opensource.org/licenses/MIT>
# @author: Simon Bowie <ad7588@coventry.ac.uk>
# @purpose: data route for data
# @acknowledgements:
#

from flask import Blueprint, render_template, request
import random
import pycountry
from . import solr

data = Blueprint('data', __name__)

# route for main data page
@data.route('/data/')
def main_data():
core = 'all'
total_number = solr.get_total_number(core)
year_data = solr.get_term_data('year', core)
country_data = solr.get_term_data('country', core)

# parse all the year data
year_labels = []
year_numbers = []
year_dataset = []
for i in range(0, len(year_data)):
if i % 2:
year_numbers.append(year_data[i])
random_colour = "#" + "%06x" % random.randint(0, 0xFFFFFF)
year_dict = {"label": "number of records", "data": year_numbers, "backgroundColor": random_colour}
else:
year_labels.append(year_data[i])
year_dataset.append(year_dict)

# parse all the country data
country_labels = []
country_numbers = []
country_dataset = []
for i in range(0, len(country_data)):
if i % 2:
country_numbers.append(country_data[i])
random_colour = "#" + "%06x" % random.randint(0, 0xFFFFFF)
country_dict = {"label": "number of records", "data": country_numbers, "backgroundColor": random_colour}
else:
country = pycountry.countries.get(alpha_2=country_data[i])
if country is None:
country = pycountry.historic_countries.get(alpha_2=country_data[i])
country_labels.append(country.name)
country_dataset.append(country_dict)

germany = pycountry.countries.get(alpha_2='DE')

return render_template('data.html', total_number=total_number, year_data=year_data, year_labels=year_labels, year_dataset=year_dataset, country_data=country_data, country_labels=country_labels, country_dataset=country_dataset, germany=germany)

+ 20
- 15
web/app/search.py Voir le fichier

@@ -6,32 +6,37 @@
# @acknowledgements:
# https://www.digitalocean.com/community/tutorials/how-to-add-authentication-to-your-app-with-flask-login

from flask import Blueprint, render_template, request
from flask import Blueprint, render_template, request, redirect, url_for
from . import solr
from . import ops

search = Blueprint('search', __name__)

# route for search page
@search.route('/search/', methods=['POST'])
@search.route('/search/', methods=['GET', 'POST'])
def basic_search():
search = request.form.get('search')
if request.form.get('core') is not None:
core = request.form.get('core')
if request.method == 'POST':
search = request.form.get('search')
if request.form.get('core') is not None:
core = request.form.get('core')
else:
core = 'all'
if request.form.get('sort') is not None:
sort = request.form.get('sort')
else:
sort = 'relevance'
search_results = solr.solr_search(core, sort, search)
results = search_results[0]
num_found = search_results[1]
return render_template('search.html', results=results, num_found=num_found, search=search, core=core, sort=sort)
else:
core = 'all'
if request.form.get('sort') is not None:
sort = request.form.get('sort')
else:
sort = 'relevance'
search_results = solr.solr_search(core, sort, search)
results = search_results[0]
num_found = search_results[1]
return render_template('search.html', results=results, num_found=num_found, search=search, core=core, sort=sort)
return redirect(url_for('main.index'))

# route for id_search page
@search.route('/search/id/')
@search.route('/search/id/', methods=['GET'])
def id_search():
if request.args.get('id') is None:
return redirect(url_for('main.index'))
if request.args.get('core') is not None:
core = request.args.get('core')
else:

+ 32
- 1
web/app/solr.py Voir le fichier

@@ -58,7 +58,10 @@ def parse_result(id, input):
doc_ref = re.search('=D\s(([^\s]*)\s([^\s]*)\s([^\s]*))', input)
if doc_ref is None:
doc_ref = re.search('=D&locale=en_EP\s(([^\s]*)\s([^\s]*)\s([^\s]*))', input)
output['doc_ref'] = doc_ref.group(1).replace(" ","")
if doc_ref is None:
output['doc_ref'] = ""
else:
output['doc_ref'] = doc_ref.group(1).replace(" ","")
else:
output['doc_ref'] = doc_ref.group(1).replace(" ","")

@@ -144,3 +147,31 @@ def get_ten_random_images():
output.append(result)
i += 1
return output

def get_total_number(core):

# Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
solrurl = 'http://' + solr_hostname + ':' + solr_port + '/solr/' + core + '/select?q.op=OR&q=*:*&wt=json'

# get result
request = requests.get(solrurl)
# turn the API response into useful Json
json = request.json()

num_found = json['response']['numFound']

return num_found

def get_term_data(field, core):

# Assemble a query string to send to Solr. This uses the Solr hostname from config.env. Solr's query syntax can be found at many sites including https://lucene.apache.org/solr/guide/6_6/the-standard-query-parser.html
solrurl = 'http://' + solr_hostname + ':' + solr_port + '/solr/' + core + '/terms?terms.fl=' + field + '&wt=json&terms.limit=1000'

# get result
request = requests.get(solrurl)
# turn the API response into useful Json
json = request.json()

output = json['terms'][field]

return output

+ 3
- 0
web/app/templates/base.html Voir le fichier

@@ -26,6 +26,9 @@
<script src="https://code.jquery.com/jquery-3.6.0.js" integrity="sha256-H+K7U5CnXl1h5ywQfKtSj8PCmoN9aaq30gDh27Xc0jk=" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/js/bootstrap.bundle.min.js" integrity="sha384-ka7Sk0Gln4gmtz2MlQnikT1wXgYsOg+OMhuP+IlRH9sENBO0LRn5q+8nbTov4+1p" crossorigin="anonymous"></script>
<script src="{{ url_for('static',filename='js/main.js') }}"></script>
<!-- JavaScript for Chart.js -->
<script src="https://cdn.jsdelivr.net/npm/chart.js@3.8.0/dist/chart.min.js"></script>

</head>

{% block body %}

+ 108
- 0
web/app/templates/data.html Voir le fichier

@@ -0,0 +1,108 @@
{% extends "base.html" %}

{% block content %}

<a href="{{ url_for('main.index') }}" class="h1 text-left"> ⇽ </a>

There are a total of {{ total_number }} patents.

<br><br>

<canvas id="chartOfYearFrequency" width="600" height="300"></canvas>
<canvas id="chartOfCountryFrequency" width="600" height="300"></canvas>

<script type="text/javascript">
// retrieve variables passed from Python
var year_labels = {{ year_labels|safe }}
var year_dataset = {{ year_dataset|safe }}

// set up chart
var data = {
labels: year_labels,
datasets: year_dataset,
};

var config = {
type: 'bar',
data: data,
options: {
plugins: {
title: {
display: true,
text: 'number of patent records for each year'
},
},
responsive: true,
scales: {
x: {
title: {
display: true,
text: 'year'
},
stacked: true,
},
y: {
title: {
display: true,
text: 'number of records'
},
stacked: true
}
}
}
};

var yearGraph = new Chart(
document.getElementById('chartOfYearFrequency'),
config
);
</script>

<script type="text/javascript">
// retrieve variables passed from Python
var country_labels = {{ country_labels|safe }}
var country_dataset = {{ country_dataset|safe }}

// set up chart
var data = {
labels: country_labels,
datasets: country_dataset,
};

var config = {
type: 'bar',
data: data,
options: {
plugins: {
title: {
display: true,
text: 'number of patent records from each country'
},
},
responsive: true,
scales: {
x: {
title: {
display: true,
text: 'country'
},
stacked: true,
},
y: {
title: {
display: true,
text: 'number of records'
},
stacked: true
}
}
}
};

var yearGraph = new Chart(
document.getElementById('chartOfCountryFrequency'),
config
);
</script>

{% endblock %}

+ 1
- 0
web/requirements.txt Voir le fichier

@@ -4,3 +4,4 @@ gunicorn
markdown
requests
Wand
pycountry

Chargement…
Annuler
Enregistrer