| @@ -10,7 +10,7 @@ | |||
| <div class="row"> | |||
| <div class="col m-5"> | |||
| <p class="h1 text-center">Reading Fragments</p> | |||
| <p class="mt-2 text-center">a <span id="time"></span> minute read out of 6375081 minutes reading time</p> | |||
| <p class="mt-2 text-center">a <span id="time"></span> minute read out of 662781 minutes reading time</p> | |||
| <a href="{{ url_for('main.index') }}" class="h1 text-left"> ⇽ </a> | |||
| </div> | |||
| </div> | |||
| @@ -0,0 +1,22 @@ | |||
| # import required modules | |||
| import os | |||
| from striprtf.striprtf import rtf_to_text | |||
| # assign directory | |||
| directory = 'data/POP_Dataset_2022' | |||
| total = 0 | |||
| # iterate over files in | |||
| # that directory | |||
| for root, dirs, files in os.walk(directory): | |||
| for filename in files: | |||
| if '.rtf' in filename: | |||
| file = os.path.join(root, filename) | |||
| file = open(file, "rt") | |||
| content = file.read() | |||
| #text = rtf_to_text(content) | |||
| words = content.split() | |||
| total += len(words) | |||
| #print(text) | |||
| print(total) | |||