I have a program that displays the PDF as an image in canvas, and the user should be able to make selections in screen, and the pixel of the selection along with the character in the selected box should be displayed in a text box UI in Tkinter. but currently am not able to see my PDF on screen. and when i select something it throws error AttributeError: 'TextPage' object has no attribute 'get_text'
So when i run it shows PDF width :612.0 PDF height :792.0 Canvas width : 1 Canvas height :1 Scale factor: 0.0012626262626262627 Since am new to this Can anyone please help
import tkinter as tk
import fitz
from PIL import Image, ImageTk
import os
class PDFViewer:
def __init__(self, pdf_path):
self.pdf_doc = fitz.open(pdf_path)
self.current_page = 0
# Create a Tkinter window and canvas
self.root = tk.Tk()
self.canvas = tk.Canvas(self.root, width=1000, height=1000)
self.canvas.pack(expand=True, fill='both')
# Load the first page of the PDF
self.load_page()
# Bind mouse events to the canvas
self.canvas.bind("<Button-1>", self.start_selection)
self.canvas.bind("<B1-Motion>", self.update_selection)
self.canvas.bind("<ButtonRelease-1>", self.end_selection)
# Start the Tkinter event loop
self.root.mainloop()
def load_page(self):
# Render the current page of the PDF as an image and display it on the canvas
page = self.pdf_doc[self.current_page]
print(f"Loading page {self.current_page}")
print(page)
# Get the dimensions of the PDF page and the canvas
page_width, page_height = page.mediabox.width, page.mediabox.height
print(page_width,page_height)
canvas_width, canvas_height = self.canvas.winfo_width(), self.canvas.winfo_height()
print(canvas_width,canvas_height)
# Calculate the scaling factor to fit the PDF page within the canvas
scale_factor = min(canvas_width / page_width, canvas_height / page_height)
print(f"Scale factor: {scale_factor}")
# Scale the PDF page to fit within the canvas
pix = page.get_pixmap(matrix=fitz.Matrix(scale_factor, scale_factor))
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
self.image = ImageTk.PhotoImage(img)
# self.canvas.create_image(0, 0, image=self.image, anchor=tk.NW)
self.canvas.create_image(canvas_width / 2, canvas_height / 2, image=self.image, anchor=tk.CENTER)
# Force the canvas to redraw itself
self.canvas.update()
def start_selection(self, event):
# Record the starting position of the selection
self.start_x = self.canvas.canvasx(event.x)
self.start_y = self.canvas.canvasy(event.y)
# Create a selection rectangle on the canvas
self.selection_rect = self.canvas.create_rectangle(self.start_x, self.start_y, self.start_x, self.start_y, outline="red")
def update_selection(self, event):
# Update the position of the selection rectangle
x = self.canvas.canvasx(event.x)
y = self.canvas.canvasy(event.y)
self.canvas.coords(self.selection_rect, self.start_x, self.start_y, x, y)
def end_selection(self, event):
if self.selection_rect:
# Get the coordinates of the selection rectangle
coords = self.canvas.coords(self.selection_rect)
x1, y1, x2, y2 = coords
# Convert canvas coordinates to PDF page coordinates
p1 = fitz.Point(self.canvas.canvasx(x1), self.canvas.canvasy(y1))
p2 = fitz.Point(self.canvas.canvasx(x2), self.canvas.canvasy(y2))
# Create a rectangle from the converted coordinates
rect = fitz.Rect(p1, p2)
# Get the text content from the selected region
page = self.pdf_doc[self.current_page]
text_page = page.get_textpage()
selected_blocks = text_page.get_text("text", clip=rect)
selected_text = "\n".join(block[0] for block in selected_blocks)
# Print the selected text in a text box
text_box = tk.Text(self.root)
text_box.insert("1.0", selected_text)
text_box.pack()
# Destroy the selection rectangle and force the canvas to redraw itself
self.canvas.delete(self.selection_rect)
self.selection_rect = None
self.canvas.update()
if __name__ == '__main__':
pdf_path = "Invoice.pdf"
pdf_abs_path = os.path.abspath(pdf_path)
viewer = PDFViewer(pdf_abs_path)`