0

I am trying to create a range of page numbers from a pdf file and then split and save them as a separate file. Below is the code written for it.

import os
from PyPDF2 import PdfFileReader, PdfFileWriter
import re

def pdf_splitter(pdf_path):    

    directory = "C:\\Users\\Docs\\"
    fname = os.path.splitext(os.path.basename(pdf_path))[0]
    print(fname)
    object = PdfFileReader(pdf_path)
    NumPages = object.getNumPages()
    print(NumPages)
    string = "Challenge 1:"
    string2 = "Challenge 2:"   
    res=0
    pageList=[]                
    for txt in range(0,NumPages):
        pdf_writer = PdfFileWriter()
        pageObject = object.getPage(txt)
        Text =  pageObject.extractText()
        print(Text)                     
        acc_pos = Text.find(string)
        print(acc_pos)            
        Cur_pos = Text.find(string2)
        print(Cur_pos)
        loanAcctName = Text[acc_pos+12:Cur_pos]
        print (loanAcctName)
        # pageList.append(txt)
        # print(pageList)        

        ReSearch = re.search(string, Text)
        if ReSearch != None:
            pageList.append(txt)
         elif ReSearch ==None:
             pageList.append(txt)
      print(pageList)   
     res = res + 1                
             pdf_writer.addPage(object.getPage(page_num))
             output_filename = '{}_page_{}.pdf'.format(loanAcctName,page + 1)                    
                                                                  
             with open(output_filename, 'wb') as out:
                pdf_writer.write(out)
                print('Created: {}'.format(output_filename))
                out.close()
                res = res + 1
            
if __name__ == '__main__':
    pdf_path = r"C:\Users\FY22.pdf"
    pdf_splitter(pdf_path)
Vadim Kotov
  • 8,084
  • 8
  • 48
  • 62
Amit Sharma
  • 53
  • 1
  • 7

0 Answers0