from selenium import webdriver
from selenium.webdriver.firefox.options import Options
import bs4
import datetime
import time
#options = Options()
#options.add_argument("--headless")
#driver = webdriver.Firefox(firefox_options=options)
driver = webdriver.Firefox()
driver.get("https://www.rankonesport.com/Calendar/?D=e8bb5c10-8d0c-4b26-
b304-262397124de8")
weekly = driver.find_element_by_id("cmd_Weekly").click()
source = driver.page_source
bs_source = bs4.BeautifulSoup(source, "lxml")
month = datetime.date.today().month
year_end = 5
total = 12
times = 0
if month <= year_end:
times = year_end - month
if month == year_end:
times = 1
if month >= year_end:
value = month - year_end
times = total - value
times *= 5
mylist = []
#{EventName:[Date, Where, Time(Start), Time(End)]}
mydict = {}
for x in range(times):
events = bs_source.find('table', id='gv_Events')
for tr in events.find_all('tr', class_='lightgray'):
td = tr.find_all('td')
mylist.append(td)
for tr2 in events.find_all('tr', class_='white'):
td2 = tr2.find_all('td')
mylist.append(td2)
next = driver.find_element_by_id('lnk_Next_Day').click()
for event in mylist:
mydict.update({event[0].text: [event[2].text, event[1].text,
event[3].text, event[4].text]})
print(mylist)
print(mydict)
So my school has an online calendar that I am trying to scrape off of. My goal is to pull each event, that happens before the school year ends, and their corresponding properties such as time and date.
I have the script loop through the calendar portion that has the events by week and pull them off. The calendar is a JS based calendar so the link does not change when the script goes and clicks the next button. I store the events and their properties in a list and then throw them into a dictionary in order to easily access them by name.
What I want to happen is the dictionary to be full of as many events as the script loops through. Rather the dictionary only contains a select few which seem to be the first couple of events it parses through. The events have the same HTML ids and classes when the next page is pulled up so it should just rinse and repeat the code as many times as I have it.
If someone could point something out that I missed or lead me in the right direction that would be awesome as I have spent way to much time trying to figure this out myself.
Links:
Dictionary Output:
{'Sadie Ticket Sales': ['3/1/2018', 'New Cafeteria, 541 Chartres St. LaSalle, Lasalle, IL 61301', '11:00 AM', '1:00 PM'],
'Winter Guard Practice': ['3/3/2018', ' East Gym, 541 Chartres St. LaSalle, Lasalle, IL 61301', '5:00 PM', '8:00 PM'],
'Sadie Dance': ['3/3/2018', 'Sellett Gym, 541 Chartres St. LaSalle, Lasalle, IL 61301', '8:00 PM', '11:00 PM']}
^Should be way, way more events
List output:
[[<td>Sadie Ticket Sales</td>, <td>New Cafeteria, 541 Chartres St. LaSalle, Lasalle, IL 61301</td>, <td>2/26/2018</td>, <td>11:00 AM</td>, <td>1:00 PM</td>, <td>Non-Game Activity</td>, <td align="center"><a href="javascript:__doPostBack('gv_Events','Outlook$0')">Sync</a></td>],
[<td>Winter Guard Practice</td>, <td>North Balcony, 541 Chartres St. LaSalle, Lasalle, IL 61301</td>, <td>2/27/2018</td>, <td>6:30 PM</td>, <td>9:00 PM</td>, <td>Non-Game Activity</td>, <td align="center"><a href="javascript:__doPostBack('gv_Events','Outlook$2')">Sync</a></td>],
...]
It seems to repeat those events over and over in the list ^
Thanks.
Edit 1:
mylist = []
#{EventName:[Date, Where, Time(Start), Time(End)]}
mydict = {}
for x in range(5):
source = driver.page_source
bs_source = bs4.BeautifulSoup(source, 'lxml')
events = bs_source.find('table', id='gv_Events')
for tr in events.find_all('tr', class_='lightgray'):
td = tr.find_all('td')
mylist.append(td)
for tr2 in events.find_all('tr', class_='white'):
td2 = tr2.find_all('td')
mylist.append(td2)
next = driver.find_element_by_id('lnk_Next_Day').click()
for event in mylist:
mydict.update({event[0].text: [event[2].text, event[1].text,
event[3].text, event[4].text]})