I wanted to read an excel file in S3 from Glue.
Here's what I've done so far.
import pandas as pd
import awswrangler as wr
import io
ad_request_path = 's3://bucketname/key.xlsx'
df = wr.s3.read_excel(ad_request_path)
OR
bucket_name = 'bucketname'
object_key = 'key.xlsx'
s3_client = boto3.client('s3')
obj = s3_client.get_object(Bucket=bucket_name, Key=object_key)
data = obj['Body'].read()
workbook = open_workbook_xls(file_contents=data)
df = pd.read_excel(io.BytesIO(data))
print(df)
I got this error message:
XLRDError: Excel xlsx file; not supported