I'm getting the following error at this line and I'm not sure why... it worked before but somewhere when debugging the code, broke... Any help? Not sure how much code is helpful to post, if this is not enough let me know and I'll update. Basically I'm just trying to extract all the links in this code into the same list from a previously jumbled list.
exceptions.TypeError: 'generator' object has no attribute '__getitem__'
item['playerurl'] = re.findall(r'"[^"]*"',"".join(item['playerurl'])) #used to parse
Edit: item declaration in item file
class TeamStats(Item):
# define the fields for your item here like:
# name = scrapy.Field()
team = Field()
division = Field()
rosterurl = Field()
player_desc = Field()
playerurl = Field()
pass
I'll just post my entire code:
##the above code is for the real run but the below code is just for testing as it hits less pages
division = response.xpath('//div[@id="content"]//div[contains(@class, "mod-teams-list-medium")]')
for team in response.xpath('//div[@id="content"]//div[contains(@class, "mod-teams-list-medium")]'): #goes through all teams in each division
item = TeamStats() #creates new TeamStats item
item['division'] = division.xpath('.//div[contains(@class, "mod-header")]/h4/text()').extract()[0] #extracts the text which represents division, team and roster url
item['team'] = team.xpath('.//h5/a/text()').extract()[0]
item['rosterurl'] = "http://espn.go.com" + team.xpath('.//div/span[2]/a[3]/@href').extract()[0]
request = scrapy.Request(item['rosterurl'], callback = self.parseWPNow) #opens up roster url to parse player data
request.meta['play'] = item
yield request #run the request through parseWPNow
def parseWPNow(self, response): #after each request in parse, this is run
item = response.meta['play'] #current item gets restored through meta tag
item = self.parseRoster(item, response) #goes through and takes basic player data while filling playerurl (needed for next step)
item = self.parsePlayer(item, response) #gets player stats
return item #returns filled item object and on to next item
def parseRoster(self, item, response):
players = Player() #creates player object to be filled
int = 0
for player in response.xpath("//td[@class='sortcell']"): #fills basic player stats in each player object
players['name'] = player.xpath("a/text()").extract()[0]
players['position'] = player.xpath("following-sibling::td[1]/text()").extract()[0]
players['age'] = player.xpath("following-sibling::td[2]/text()").extract()[0]
players['height'] = player.xpath("following-sibling::td[3]/text()").extract()[0]
players['weight'] = player.xpath("following-sibling::td[4]/text()").extract()[0]
players['college'] = player.xpath("following-sibling::td[5]/text()").extract()[0]
players['salary'] = player.xpath("following-sibling::td[6]/text()").extract()[0]
players['height'] = players['height']
yield players
item['playerurl'] = response.xpath("//td[@class='sortcell']/a").extract() #playerurl is important for extracting the data info
yield item
def parsePlayer(self,item,response):
item['playerurl'] = re.findall(r'"[^"]*"',"".join(item['playerurl'])) #used to parse
for each in item['playerurl']: #goes through each player in url and sets up requests1 to extract requests
each = each[1:-1]
each = each[:30]+"gamelog/"+each[30:]
request1 = scrapy.Request(each, callback = self.parsePlayerNow)
yield request1