<div class="row"> | <div class="row"> | ||||
<div class="col m-5"> | <div class="col m-5"> | ||||
<p class="h1 text-center">Reading Fragments</p> | <p class="h1 text-center">Reading Fragments</p> | ||||
<p class="mt-2 text-center">a <span id="time"></span> minute read out of 6375081 minutes reading time</p> | |||||
<p class="mt-2 text-center">a <span id="time"></span> minute read out of 662781 minutes reading time</p> | |||||
<a href="{{ url_for('main.index') }}" class="h1 text-left"> ⇽ </a> | <a href="{{ url_for('main.index') }}" class="h1 text-left"> ⇽ </a> | ||||
</div> | </div> | ||||
</div> | </div> |
# import required modules | |||||
import os | |||||
from striprtf.striprtf import rtf_to_text | |||||
# assign directory | |||||
directory = 'data/POP_Dataset_2022' | |||||
total = 0 | |||||
# iterate over files in | |||||
# that directory | |||||
for root, dirs, files in os.walk(directory): | |||||
for filename in files: | |||||
if '.rtf' in filename: | |||||
file = os.path.join(root, filename) | |||||
file = open(file, "rt") | |||||
content = file.read() | |||||
#text = rtf_to_text(content) | |||||
words = content.split() | |||||
total += len(words) | |||||
#print(text) | |||||
print(total) |