Browse Source

WIP: counting how many words are in abstracts

solr_update
Simon Bowie 2 years ago
parent
commit
44caddce51
2 changed files with 23 additions and 1 deletions
  1. +1
    -1
      web/app/templates/abstracts.html
  2. +22
    -0
      word_count.py

+ 1
- 1
web/app/templates/abstracts.html View File

@@ -10,7 +10,7 @@
<div class="row">
<div class="col m-5">
<p class="h1 text-center">Reading Fragments</p>
<p class="mt-2 text-center">a <span id="time"></span> minute read out of 6375081 minutes reading time</p>
<p class="mt-2 text-center">a <span id="time"></span> minute read out of 662781 minutes reading time</p>
<a href="{{ url_for('main.index') }}" class="h1 text-left"> ⇽ </a>
</div>
</div>

+ 22
- 0
word_count.py View File

@@ -0,0 +1,22 @@
# import required modules
import os
from striprtf.striprtf import rtf_to_text

# assign directory
directory = 'data/POP_Dataset_2022'
total = 0

# iterate over files in
# that directory
for root, dirs, files in os.walk(directory):
for filename in files:
if '.rtf' in filename:
file = os.path.join(root, filename)
file = open(file, "rt")
content = file.read()
#text = rtf_to_text(content)
words = content.split()
total += len(words)
#print(text)

print(total)

Loading…
Cancel
Save