when I can extract the content from list of URLs and then store the contents in text files , the problem is it is my python code is reading only last url link from the text file and store only those contents. Here I am using the goose extraction tool to pull some text from URLs
Can Help me out with this (any problem with for loop here ??)
class FetchUrl(Thread):
def __init__(self, url, name):
Thread.__init__(self)
self.name = name
self.url = url
def run(self):
config = Configuration()
config.browser_user_agent = 'Mozilla 5.0'
config.http_timeout = 20
g = Goose(config)
fname = os.path.basename(self.name)
with open(fname +".txt","w+") as f_handler:
for tmp in url:
article = g.extract(url=tmp)
contents = article.cleaned_text
f_handler.write(contents)
msg = "%s was finished downloaded with this link %s!" % (self.name,
self.url)
print(msg)
def main(url):
for item , url in enumerate(url):
name = "Thread %s" % (item+1)
fetch = FetchUrl(url, name)
fetch.start()
if __name__ == "__main__":
u_path = 'url_list/url.txt'
url = []
for line in open(u_path):
line = line.strip()
url.append(line)
print(line)
main(url)