I just need to be sure about the performance as currently i am working with functions with returns and and its takes too much time to display the whole result.Following is an approach using yeild
dirpath="E:\\Python_Resumes\\"
def getResumeList(dirpath):
resumes=[]
files = os.listdir(dirpath)
for file in files:
if file.endswith(".pdf"):
yield file
fileObject=getResumeList(dirpath)
def convertToRawText(fileObject):
rawText=""
resumeContent={}
for file in fileObject:
fContent=open(dirpath+file,'rb')
rsrcmgr = PDFResourceManager()
sio = StringIO()
codec = 'utf-8'
laparams = LAParams()
device = TextConverter(rsrcmgr, sio, codec=codec, laparams=laparams)
interpreter = PDFPageInterpreter(rsrcmgr, device)
for page in PDFPage.get_pages(fContent):
interpreter.process_page(page)
rawText = sio.getvalue()
yield rawText
result=convertToRawText(fileObject)
for r in result:
print(r)
print("\n")
and following is an approach using return
def getResumeList(dirpath):
resumes=[]
files = os.listdir(dirpath)# Get all the files in that directory
for file in files:
if file.endswith(".pdf"):
resumes.append(file)
return resumes
listOfFiles=getResumeList(dirpath)
def convertToRawText(files):
rawText=""
resumeContent={}
for file in files:
fContent=open(dirpath+file,'rb')
rsrcmgr = PDFResourceManager()
sio = StringIO()
codec = 'utf-8'
laparams = LAParams()
device = TextConverter(rsrcmgr, sio, codec=codec, laparams=laparams)
interpreter = PDFPageInterpreter(rsrcmgr, device)
for page in PDFPage.get_pages(fContent):
interpreter.process_page(page)
rawText = sio.getvalue()
resumeContent[file]=rawText
return resumeContent
bulkResumesText={}
bulkResumesText = convertToRawText(list(listOfFiles))
for b in bulkResumeText:
print(bulkResumeText[b])
which would be better one from performance and efficiency point?