0

enter image description here

import fitz  # PyMuPDF library

# Load the PDF document
pdf_path = "./FAQ.pdf"
pdf_document = fitz.open(pdf_path)

# Initialize a dictionary to store text and hyperlink pairs
text_with_links = {}

# Iterate through the pages of the PDF document
for page_num in range(pdf_document.page_count):
    page = pdf_document.load_page(page_num)
    
    # Extract text annotations from the page
    text = page.get_text("text")
    links = page.get_links()

    for link in links:
        if "uri" in link:
            if text not in text_with_links:
                text_with_links[text] = []
            text_with_links[text].append(link["uri"])

# Create a new PDF with updated links next to text
new_pdf_path = "document.pdf"
new_pdf_document = fitz.open()
for text, links in text_with_links.items():
    text_with_links_text = f"{text}\n\nLinks: {', '.join(links)}"
    page = new_pdf_document.new_page()
    page.insert_text((100, 100), text_with_links_text)

# Save the new PDF document
new_pdf_document.save(new_pdf_path)
new_pdf_document.close()

# Close the original PDF document
pdf_document.close()

print("New PDF with updated links has been created.")

What I want is Get in touch/("http://get_intouch.com/)Feedback ("http//feedback.com"/) the hyperlinks to be extracted and populated next to the word which has link , why I need this is because I am doing it as a hack to pass it to my model , please help me my code sucks to add link and whole pdf is distorted

nithin
  • 753
  • 3
  • 7
  • 21

0 Answers0