Use pandas to get and manipulate the table. The links are static and by that I mean they can be easily recreated with offender's first and last name.
Then, you can use requests
and BeautifulSoup
to scrape for offender's last statement, which are quite moving.
Here's how:
import requests
import pandas as pd
def clean(first_and_last_name: list) -> str:
name = "".join(first_and_last_name).replace(" ", "").lower()
return name.replace(", Jr.", "").replace(", Sr.", "").replace("'", "")
base_url = "https://www.tdcj.texas.gov/death_row"
response = requests.get(f"{base_url}/dr_executed_offenders.html")
df = pd.read_html(response.text, flavor="bs4")
df = pd.concat(df)
df.rename(columns={'Link': "Offender Information", "Link.1": "Last Statement URL"}, inplace=True)
df["Offender Information"] = df[
["Last Name", 'First Name']
].apply(lambda x: f"{base_url}/dr_info/{clean(x)}.html", axis=1)
df["Last Statement URL"] = df[
["Last Name", 'First Name']
].apply(lambda x: f"{base_url}/dr_info/{clean(x)}last.html", axis=1)
df.to_csv("offenders.csv", index=False)
This gets you:

EDIT:
I actually went ahead and added the code that fetches all offenders' last statements.
import random
import time
import pandas as pd
import requests
from lxml import html
base_url = "https://www.tdcj.texas.gov/death_row"
response = requests.get(f"{base_url}/dr_executed_offenders.html")
statement_xpath = '//*[@id="content_right"]/p[6]/text()'
def clean(first_and_last_name: list) -> str:
name = "".join(first_and_last_name).replace(" ", "").lower()
return name.replace(", Jr.", "").replace(", Sr.", "").replace("'", "")
def get_last_statement(statement_url: str) -> str:
page = requests.get(statement_url).text
statement = html.fromstring(page).xpath(statement_xpath)
text = next(iter(statement), "")
return " ".join(text.split())
df = pd.read_html(response.text, flavor="bs4")
df = pd.concat(df)
df.rename(
columns={'Link': "Offender Information", "Link.1": "Last Statement URL"},
inplace=True,
)
df["Offender Information"] = df[
["Last Name", 'First Name']
].apply(lambda x: f"{base_url}/dr_info/{clean(x)}.html", axis=1)
df["Last Statement URL"] = df[
["Last Name", 'First Name']
].apply(lambda x: f"{base_url}/dr_info/{clean(x)}last.html", axis=1)
offender_data = list(
zip(
df["First Name"],
df["Last Name"],
df["Last Statement URL"],
)
)
statements = []
for item in offender_data:
*names, url = item
print(f"Fetching statement for {' '.join(names)}...")
statements.append(get_last_statement(statement_url=url))
time.sleep(random.randint(1, 4))
df["Last Statement"] = statements
df.to_csv("offenders_data.csv", index=False)
This will take a couple of minutes because the code "sleeps" for anywhere between 1
to 4
seconds, more or less, so the server doesn't get abused.
Once this gets done, you'll end up with a .csv
file with all offenders' data and their statements, if there was one.