I'm web scraping this url : http://www.rajtamil.com/category/vijay-tv-shows/
Getting stuck with this error:
movTitle = str(link['title'])
UnicodeEncodeError: 'ascii' codec can't encode character u'\u2013' in position 41: ordinal not in range(128)
Here's my code snippet
rajTamilurl='http://www.rajtamil.com/category/vijay-tv-shows/'
req = urllib2.Request(rajTamilurl)
req.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3')
response = urllib2.urlopen(req)
link=response.read()
response.close()
#Here's what i've tried so far
#link=link.decode('utf-8')
#link=link.encode('utf-8','ignore')
#link=link.decode('ascii', 'ignore')
#soup = BeautifulSoup(link, from_encoding="utf-8")
#soup = BeautifulSoup(link.decode('utf-8','ignore'))
#soup = BeautifulSoup(link, 'html5lib')
#print soup.prettify()
soup = BeautifulSoup(link)
for eachItem in soup.findAll('li'):
for coveritem in eachItem.findAll("div", { "class":"cover" }):
links = coveritem.find_all('a')
for link in links:
print link['title']
movTitle = str(link['title'])
Any pointers ?