Applying wand to all images in a directory

Question

I try to convert all the pdf of a directory into images, but I have an issues because it only convert one pdf not all of them.

import matplotlib
import pytesseract
import os
import argparse
import cv2
from PIL import Image
import PyPDF2
from wand.image import Image as wi

for filename in os.listdir(src_path):
    count = count + 1

    # apply tesseract OCR and write to text file in specified target directory
    target_path = args.trg_dir

    # check if a file is a directory
    if os.path.isdir(filename):
        pass
    else:
        # check if a file is a pdf
        try:
            PyPDF2.PdfFileReader(open(os.path.join(src_path, filename), "rb"))
        except PyPDF2.utils.PdfReadError:
        else:
            pdf = wi(filename=filename, resolution=300)
            pdfimage = pdf.convert("jpeg")
            i=1
            for img in pdfimage.sequence:
                page = wi(image=img)
                page.save(filename=str(i)+".jpg")
                i +=1

score 0 · Answer 1 · answered Sep 21 '21 at 12:59

0

IIUC, try:

files = [file for file in os.listdir(src_path) if file.endswith(".pdf")]
for file in files:
    with wi(file, resolution=300) as img_pdf:
        for page, img in enumerate(img_pdf.sequence):
            wi(img).save(f"{file}_{page}.jpg")

answered Sep 21 '21 at 12:59

not_speshal

22,093
2
15
30

Applying wand to all images in a directory

1 Answers1