I am trying to crop a pdf within and lambda and save the file. Ideally I just want to zoom in as otherwise the OCR package does not recognize some of the fonts. The rectangle I am using just seems to shift the margins versus actually cropping or zooming in.
Thanks!
import os
import json
import boto3
from urllib.parse import unquote_plus
import fitz, sys
from io import BytesIO
OUTPUT_BUCKET_NAME = os.environ["OUTPUT_BUCKET_NAME"]
OUTPUT_S3_PREFIX = os.environ["OUTPUT_S3_PREFIX"]
SNS_TOPIC_ARN = os.environ["SNS_TOPIC_ARN"]
SNS_ROLE_ARN = os.environ["SNS_ROLE_ARN"]
def lambda_handler(event, context):
if event:
file_obj = event["Records"][0]
bucketname = str(file_obj["s3"]["bucket"]["name"])
filename = unquote_plus(str(file_obj["s3"]["object"]["key"]))
doc = fitz.open()
s3 = boto3.resource('s3')
obj = s3.Object(bucketname, filename)
fs = obj.get()['Body'].read()
pdf=fitz.open("pdf", stream=BytesIO(fs))
rect=fitz.Rect(50, 50, 545, 792)
page = pdf[0]
page1 = doc.new_page(width = rect.width, # new page with ...
height = rect.height)
page1.show_pdf_page(rect, pdf, 0)
new_bytes = doc.write()
bucketname1='modified'
s3.Bucket(bucketname1).put_object(Key=filename, Body=new_bytes)