When i was trying to use get_reviews fn it is giving me error at f.read.decode()
saying str
has no function decode and when I removed .decode()
it again giving me error which I specify down the 1st error.
def get_reviews(dirname,positive=True ):
label = 1 if positive else 0
reviews = []
for filename in os.listdir(dirname):
if filename.endswith(".txt"):
with open(dirname + filename,"r+") as f:
review = f.read().decode('utf-8')#we decoding text as utf 8
review = review.lower().replace("<br />"," ")
review = re.sub(token_regex,"",review)
#returning a tuple of the reviews text and lable for
#wheather it a positive or negative review
reviews.append([review,label])
return reviews
error:
AttributeError Traceback (most recent call last)
<ipython-input-6-92e2ebb79bdf> in <module>()
----> 1 positive_reviews,negative_reviews=extract_reviews()
<ipython-input-5-233b24b569a3> in extract_reviews()
22 tar.extractall()
23 tar.close()
---> 24 positive_reviews = get_reviews("aclimdb/train/pos/",positive = True)
25 negative_reviews = get_reviews("aclimdb/train/neg/",positive=False)
26
<ipython-input-5-233b24b569a3> in get_reviews(dirname, positive)
7 with open(dirname + filename,"r+") as f:
8
----> 9 review = f.read().decode('utf-8')#we decoding text as utf 8
10 review = review.lower().replace("<br />"," ")#converting it to lower case and removing spaces
11 review = re.sub(token_regex,"",review) #and surbbing the sentenses having special characters
AttributeError: 'str' object has no attribute 'decode'
It is the second error i got when i try to remove .decode() and if remove .decode() then it gives me following error.
UnicodeDecodeError Traceback (most recent call last)
<ipython-input-8-92e2ebb79bdf> in <module>()
----> 1 positive_reviews,negative_reviews=extract_reviews()
<ipython-input-7-6a7844747fcf> in extract_reviews()
22 tar.extractall()
23 tar.close()
---> 24 positive_reviews = get_reviews("aclimdb/train/pos/",positive = True)
25 negative_reviews = get_reviews("aclimdb/train/neg/",positive=False)
26
<ipython-input-7-6a7844747fcf> in get_reviews(dirname, positive)
7 with open(dirname + filename,"r+") as f:
8
----> 9 review = f.read()#we decoding text as utf 8
10 review = review.lower().replace("<br />"," ")#converting it to lower case and removing spaces
11 review = re.sub(token_regex,"",review) #and surbbing the sentenses having special characters
C:\ProgramData\Anaconda3\lib\encodings\cp1252.py in decode(self, input, final)
21 class IncrementalDecoder(codecs.IncrementalDecoder):
22 def decode(self, input, final=False):
---> 23 return codecs.charmap_decode(input,self.errors,decoding_table)[0]
24
25 class StreamWriter(Codec,codecs.StreamWriter):
UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d in position 803: character maps to <undefined>