@@ -10,7 +10,7 @@ | |||
<div class="row"> | |||
<div class="col m-5"> | |||
<p class="h1 text-center">Reading Fragments</p> | |||
<p class="mt-2 text-center">a <span id="time"></span> minute read out of 6375081 minutes reading time</p> | |||
<p class="mt-2 text-center">a <span id="time"></span> minute read out of 662781 minutes reading time</p> | |||
<a href="{{ url_for('main.index') }}" class="h1 text-left"> ⇽ </a> | |||
</div> | |||
</div> |
@@ -0,0 +1,22 @@ | |||
# import required modules | |||
import os | |||
from striprtf.striprtf import rtf_to_text | |||
# assign directory | |||
directory = 'data/POP_Dataset_2022' | |||
total = 0 | |||
# iterate over files in | |||
# that directory | |||
for root, dirs, files in os.walk(directory): | |||
for filename in files: | |||
if '.rtf' in filename: | |||
file = os.path.join(root, filename) | |||
file = open(file, "rt") | |||
content = file.read() | |||
#text = rtf_to_text(content) | |||
words = content.split() | |||
total += len(words) | |||
#print(text) | |||
print(total) |