1

I'm trying to write a python script to check the status's display text for a specific country (ie. Ecuador) on this website: https://immi.homeaffairs.gov.au/what-we-do/whm-program/status-of-country-caps.

How do I keep track on that specific text when a change happens?

Currently, I tried to compare the hash codes after a time delay interval however the hash code seems to change every time even though nothing change visually.

input_website = 'https://immi.homeaffairs.gov.au/what-we-do/whm-program/status-of-country-caps'
time_delay = 60

#Monitor the website
def monitor_website():
    # Run the loop the keep monitoring
    while True:
        # Visit the website to know if it is up
        status = urllib.request.urlopen(input_website).getcode()
        # If it returns 200, the website is up
        if status != 200:
            # Call email function
            send_email("The website is DOWN")
        else:
            send_email("The website is UP")
            # Open url and create the hash code
            response = urllib.request.urlopen(input_website).read()
            current_hash = hashlib.sha224(response).hexdigest()
            # Revisit the website after time delay
            time.sleep(time_delay)
            # Visit the website after delay, and generate the new website
            response = urllib.request.urlopen(input_website).read()
            new_hash = hashlib.sha224(response).hexdigest()
            # Check the hash codes
            if new_hash != current_hash:
                send_email("The website CHANGED")



Yos
  • 11
  • 1

1 Answers1

0

Can you check it using Beautiful Soup? Crawl the page for "Ecuador" and then check the next word for "suspended**"

import urllib.request, urllib.parse, urllib.error
from bs4 import BeautifulSoup
import ssl


# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE


url = "https://immi.homeaffairs.gov.au/what-we-do/whm-program/status-of-country-caps"
html = urllib.request.urlopen(url, context=ctx).read()
soup = BeautifulSoup(html, 'html.parser')

# create list of all tags 'td'
list_name = list()
tags = soup('td')
for tag in tags:
    #take out whitespace and \u200b unicode
    url_grab = tag.get_text().strip(u'\u200b').strip()
    list_name.append(url_grab)

#Search list for Ecuador and following item in list
country_status ={}
for i in range(len(list_name)):
    if "Ecuador" in list_name[i]:
        country_status[list_name[i]] = list_name[i+1]
        print(country_status)
    else:
        continue

#Check website
if country_status["Ecuador"] != "suspended**":
    print("Website has changed")
Ejselter
  • 1
  • 2