Browse Source

improvements to results parsing

solr_update
Simon Bowie 2 years ago
parent
commit
0d913916e0
1 changed files with 14 additions and 5 deletions
  1. +14
    -5
      web/app/solr.py

+ 14
- 5
web/app/solr.py View File

@@ -109,16 +109,23 @@ def parse_result(id, input):
output['ipc_publication_url'] = ipc_publication.group(1)

# search for the title in the content element and display it
title = re.search('Title.*\n(.*)\n', input)
title = re.search('Title.*?\\n(.*?)\\n|Tile.?\\n(.*?)\\n', input)
if title is not None:
output['title'] = title.group(1)
if title.group(1) is not None:
output['title'] = title.group(1)
else:
output['title'] = title.group(2)

# search for the abstract in the content element and display it
abstract = re.search('Abstract.*\n(.*)\n', input)
if abstract is None:
abstract = re.search('\(.\) \\n\\n(.*)\\n', input)
if abstract is not None:
output['abstract'] = abstract.group(1);
if abstract.group(1) is not None:
output['abstract'] = abstract.group(1)
else:
abstract = re.search('\(.*?\) (\\n\\n\\n\\n|\\n\\n\\n|\\n\\n)(.*)\\n', input)
if abstract is not None:
if abstract.group(2) is not None:
output['abstract'] = abstract.group(2)

# search for the year in the content element and display it
year = re.search('=D[^\s]*\s[^\s]*\s[^\s]*\s[^\s]*\s(\d{4})', input)
@@ -137,6 +144,8 @@ def parse_result(id, input):
else:
output['country'] = country_code.group(1)

output['raw'] = input

return output

def get_random_record(core):

Loading…
Cancel
Save