I am creating a PDF file without text from a pdf file with text using the following program
def remove_text_from_pdf(pdf_path_in, pdf_path_out): '''Removes the text from the PDF file and saves it as a new PDF file''' #Open the PDF file with the diagram and the text in read mode pdf_file = open(pdf_path_in, 'rb')
#Create a PDF reader and wrtiter object
pdf_reader = PyPDF2.PdfReader(pdf_file) #OLDER VERSION WAS IN USE
pdf_writer = PyPDF2.PdfWriter(pdf_file) #OLDER VERSION 'PDFFILEWRITER' IN USE
#Get the pages from the PDF reader
page = pdf_reader.pages[0]
#Add the pages from the pdf reader to the pdf writer
pdf_writer.add_page(page)
#Remove the text from all pages added to the writer
pdf_writer.remove_text()
#Open the text output file in write mode
out_file = open(pdf_path_out, 'wb')
#Save the information to the text file
pdf_writer.write(out_file)
return
I am converting the output to a png file using the following function
def convert_pdf_to_png(pdf_path, png_path): '''Converts a PDF file to a PNG file''' #Set the image maximum pixels to be none so that it doesn't give a DOS attack error
pdffile = pdf_path
doc = fitz.open(pdffile)
page = doc.load_page(0) # number of page
pix = page.get_pixmap()
output = png_path
pix.save(output)
doc.close()
but it gives me a png file that is just a blank white copy.
I was expecting a PDF file which is non blank