I am trying to create a Python program that reads two text files, one containing an article and the other containing a list of "stop words" (one word on each line). I would like to determine how many of these "stop words" are in the specific text file I'm using (the cumulative total of the frequency of each "stop word") containing the article.
I tried creating nested for
loops in order to do this where I'm looping through each line of the file containing the article (outer for loop), and within each line, having a for loop (inner for loop) that loops through the list of "stop words", and sees if a "stop word" is in the current line, and if so, how often. At the end, I add how often the word is in the current line to an accumulator that will keep track of the total cumulative amount of stop words found in the file containing the article.
Currently, when I run it, it says there are 0 stop words in the file, which is incorrect.
import string
def main():
analyzed_file = open('LearnToCode_LearnToThink.txt', 'r')
stop_word_file = open('stopwords.txt', 'r')
stop_word_accumulator = 0
for analyzed_line in analyzed_file.readlines():
formatted_line = remove_punctuation(analyzed_line)
for stop_word_line in stop_word_file.readlines():
stop_formatted_line = create_stopword_list(stop_word_line)
if stop_formatted_line in formatted_line:
stop_word_frequency = formatted_line.count(stop_formatted_line)
stop_word_accumulator += stop_word_frequency
print("there are ",stop_word_accumulator, " words")
stop_word_file.close()
analyzed_file.close()
def create_stopword_list(stop_word_text):
clean_words = [] # create an empty list
stop_word_text = stop_word_text.rstrip() # remove trailing whitespace characters
new_words = stop_word_text.split() # create a list of words from the text
for word in new_words: # normalize and add to list
clean_words.append(word.strip(string.punctuation).lower())
return clean_words
def remove_punctuation(text):
clean_words = [] # create an empty list
text = text.rstrip() # remove trailing whitespace characters
words = text.split() # create a list of words from the text
for word in words: # normalize and add to list
clean_words.append(word.strip(string.punctuation).lower())
return clean_words
main()