| <div class="row"> | <div class="row"> | ||||
| <div class="col m-5"> | <div class="col m-5"> | ||||
| <p class="h1 text-center">Reading Fragments</p> | <p class="h1 text-center">Reading Fragments</p> | ||||
| <p class="mt-2 text-center">a <span id="time"></span> minute read out of 6375081 minutes reading time</p> | |||||
| <p class="mt-2 text-center">a <span id="time"></span> minute read out of 662781 minutes reading time</p> | |||||
| <a href="{{ url_for('main.index') }}" class="h1 text-left"> ⇽ </a> | <a href="{{ url_for('main.index') }}" class="h1 text-left"> ⇽ </a> | ||||
| </div> | </div> | ||||
| </div> | </div> |
| # import required modules | |||||
| import os | |||||
| from striprtf.striprtf import rtf_to_text | |||||
| # assign directory | |||||
| directory = 'data/POP_Dataset_2022' | |||||
| total = 0 | |||||
| # iterate over files in | |||||
| # that directory | |||||
| for root, dirs, files in os.walk(directory): | |||||
| for filename in files: | |||||
| if '.rtf' in filename: | |||||
| file = os.path.join(root, filename) | |||||
| file = open(file, "rt") | |||||
| content = file.read() | |||||
| #text = rtf_to_text(content) | |||||
| words = content.split() | |||||
| total += len(words) | |||||
| #print(text) | |||||
| print(total) |