0

I have a program that displays the PDF as an image in canvas, and the user should be able to make selections in screen, and the pixel of the selection along with the character in the selected box should be displayed in a text box UI in Tkinter. but currently am not able to see my PDF on screen. and when i select something it throws error AttributeError: 'TextPage' object has no attribute 'get_text'

So when i run it shows PDF width :612.0 PDF height :792.0 Canvas width : 1 Canvas height :1 Scale factor: 0.0012626262626262627 Since am new to this Can anyone please help

import tkinter as tk
import fitz
from PIL import Image, ImageTk
import os


class PDFViewer:
    def __init__(self, pdf_path):
        self.pdf_doc = fitz.open(pdf_path)
        self.current_page = 0

        # Create a Tkinter window and canvas
        self.root = tk.Tk()
        self.canvas = tk.Canvas(self.root, width=1000, height=1000)
        self.canvas.pack(expand=True, fill='both')

        # Load the first page of the PDF
        self.load_page()

        # Bind mouse events to the canvas
        self.canvas.bind("<Button-1>", self.start_selection)
        self.canvas.bind("<B1-Motion>", self.update_selection)
        self.canvas.bind("<ButtonRelease-1>", self.end_selection)
        # Start the Tkinter event loop
        self.root.mainloop()

    def load_page(self):
        # Render the current page of the PDF as an image and display it on the canvas
        page = self.pdf_doc[self.current_page]
        print(f"Loading page {self.current_page}")
        print(page)

        # Get the dimensions of the PDF page and the canvas
        page_width, page_height = page.mediabox.width, page.mediabox.height
        print(page_width,page_height)
        canvas_width, canvas_height = self.canvas.winfo_width(), self.canvas.winfo_height()
        print(canvas_width,canvas_height)

        # Calculate the scaling factor to fit the PDF page within the canvas
        scale_factor = min(canvas_width / page_width, canvas_height / page_height)
        print(f"Scale factor: {scale_factor}")

        # Scale the PDF page to fit within the canvas
        pix = page.get_pixmap(matrix=fitz.Matrix(scale_factor, scale_factor))
        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
        self.image = ImageTk.PhotoImage(img)
        # self.canvas.create_image(0, 0, image=self.image, anchor=tk.NW)
        self.canvas.create_image(canvas_width / 2, canvas_height / 2, image=self.image, anchor=tk.CENTER)
        # Force the canvas to redraw itself
        self.canvas.update()

    def start_selection(self, event):
        # Record the starting position of the selection
        self.start_x = self.canvas.canvasx(event.x)
        self.start_y = self.canvas.canvasy(event.y)

        # Create a selection rectangle on the canvas
        self.selection_rect = self.canvas.create_rectangle(self.start_x, self.start_y, self.start_x, self.start_y, outline="red")

    def update_selection(self, event):
        # Update the position of the selection rectangle
        x = self.canvas.canvasx(event.x)
        y = self.canvas.canvasy(event.y)
        self.canvas.coords(self.selection_rect, self.start_x, self.start_y, x, y)

    def end_selection(self, event):
        if self.selection_rect:
            # Get the coordinates of the selection rectangle
            coords = self.canvas.coords(self.selection_rect)
            x1, y1, x2, y2 = coords

            # Convert canvas coordinates to PDF page coordinates
            p1 = fitz.Point(self.canvas.canvasx(x1), self.canvas.canvasy(y1))
            p2 = fitz.Point(self.canvas.canvasx(x2), self.canvas.canvasy(y2))

            # Create a rectangle from the converted coordinates
            rect = fitz.Rect(p1, p2)

            # Get the text content from the selected region
            page = self.pdf_doc[self.current_page]
            text_page = page.get_textpage()
            selected_blocks = text_page.get_text("text", clip=rect)
            selected_text = "\n".join(block[0] for block in selected_blocks)

            # Print the selected text in a text box
            text_box = tk.Text(self.root)
            text_box.insert("1.0", selected_text)
            text_box.pack()

            # Destroy the selection rectangle and force the canvas to redraw itself
            self.canvas.delete(self.selection_rect)
            self.selection_rect = None
            self.canvas.update()

if __name__ == '__main__':
    pdf_path = "Invoice.pdf"
    pdf_abs_path = os.path.abspath(pdf_path)
    viewer = PDFViewer(pdf_abs_path)`
bay bay
  • 11
  • 1
  • If you look into the console output clearly, you will find that the reported canvas size is 1x1, so the scale factor is a very small float number. It produces a 1x1 image so you cannot see it. – acw1668 Feb 20 '23 at 10:18
  • but in code i gave it as 1000 x 1000 self.canvas = tk.Canvas(self.root, width=1000, height=1000) @acw1668 – bay bay Feb 20 '23 at 10:24
  • The actual size will be effective if you call `self.canvas.update()` after the canvas is created. Use `.winfo_reqwidth()` and `.winfo_reqheight()` instead. – acw1668 Feb 20 '23 at 10:35
  • Does the answer help? https://stackoverflow.com/questions/67112724/fitz-open-not-working-when-in-a-for-loop-fitz-python-pymupdf Look @Hassan Shahzad. – toyota Supra Feb 20 '23 at 12:09

0 Answers0