When I ran this code on Jupyter and Virtual Machine, it ran smoothly. But when I started running in on AWS, it always shows list index out of range. I would like to know how to fix this problem. Thanks!
Code:
from datetime import datetime, timedelta
from time import strptime
import requests
from lxml import html
import re
import time
import os
import sys
from pandas import DataFrame
import numpy as np
import pandas as pd
import sqlalchemy as sa
from sqlalchemy import create_engine
from sqlalchemy.sql import text as sa_text
import pymysql
date_list=[]
for i in range(0,2):
duration=datetime.today() - timedelta(days=i)
forma=duration.strftime("%m-%d")
date_list.append(forma)
print(date_list)
def curl_topic_url_hot():
url = 'https://www.xxxx.com/topiclist.php?f=397&p=1'
headers = {'User-Agent': 'aaaaaaaaaaaaaaa'}
response = requests.get(url, headers=headers)
tree = html.fromstring(response.text)
output = tree.xpath("//div[@class='pagination']/a[7]")
maxPage = int(output[0].text)
print('There are', maxPage, 'pages.')
return [maxPage]
topic_url_hot = curl_topic_url_hot()
AWS log:
['02-12', '02-11']
Traceback (most recent call last):
File "/home/hadoop/ellen_crawl/test0211_mobile.py", line 167, in <module>
topic_url_hot = curl_topic_url_hot()
File "/home/hadoop/ellen_crawl/test0211_mobile.py", line 48, in curl_topic_url_hot
maxPage = int(output[0].text)
IndexError: list index out of range
When I ran this code on Jupyter, it shows:
['02-12', '02-11']
There are 818 pages.