I am reading a news article and pos-tagging with nltk. I want to remove those lines that does not have a pos tag like CD (numbers).
import io
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk import pos_tag
stop_words = set(stopwords.words('english'))
file1 = open("etorg.txt")
line = file1.read()
file1.close()
print(line)
words = line.split()
tokens = nltk.pos_tag(words)
How do I remove all sentences that do not contain the CD tag?