I'm editing a pdf by redacting certain words and adding different words on top of the redacted area in pymupdf.
The code works successfully however it makes a very large single page pdf (9MB). I assume this is because of drawing many shapes and redactions but I can't seem to refactor.
I know from this post that I shouldn't be applying page.apply_redactions()
more than once but if I don't the text doesn't correctly display on top of the redacted square, or it raises ValueError: fill rect must be finite and not empty
.
Any help in refactoring for a smaller output pdf would be much appreciated.
doc = fitz.open(self.path)
# get pdf background colour
col = fitz.utils.getColor("py_color")
# iterating through pages
for page in doc:
page.wrap_contents()
# geting the rect boxes which consists the matching regex
sensitive = self.get_sensitive_data(page.getText("text")
.split('\n'))
for data in sensitive:
areas = page.searchFor(data)
for area in areas:
text_page = page.get_textpage(clip=area)
text_page = text_page.extractDICT(area)
# text_page = area
max_length = fitz.getTextlength(str(max(column, key=len)), fontsize=fontsize)+14
area = format_border(page, area, data, fontsize, align=align, max_length=max_length)
area.y1 = add_yrect_line(column, area.y1, area.y1-area.y0)
col = fitz.utils.getColor("white")
redaction = page.addRedactAnnot(new_area, fill=col, text=" ") #flags not available
page.apply_redactions() # page.apply_redations(images=fitz.PDF_REDACT_IMAGE_NONE) to circumvent transparent image issues
writer = fitz.TextWriter(page.rect, color=color)
# align to top of box if align right:
writer.fill_textbox(new_area, variable, fontsize=fontsize, warn=True, align=align, font=font)
writer.write_text(page)
# To show what happened, draw the rectangles, etc.
shape = page.newShape()
shape.drawRect(new_area) # the rect within which we had to stay
shape.finish(stroke_opacity=0) # show in red color
shape.commit()
shape = page.newShape()
shape.drawRect(writer.text_rect) # the generated TextWriter rectangle
shape.drawCircle(writer.last_point, 2) # coordinates of end of text
shape.finish(stroke_opacity=0) # show with blue color
shape.commit()
writer = fitz.TextWriter(area, color=color)