I'm making a pdf 'date checker' in Python which tells me if every page of the pdf has tomorrows date at the top (for checking newspapers as part of my job).
So far so good until I attempted to put it all into a GUI, the buttons display the correct filename, but only open and check the last file in he list the buttons were generated from 'Files[i]'.
Can anybody figure out from my horrible nooby code why this is happening? please excuse the mess (I'm new) :)
Here is my ugly code :) I think the issue is either where I open the file using 'with open(files[i])' or 3rd line from the bottom where the buttons are created. Any help would be greatly appreciated, thank you.
import os, glob
import fileinput
import tkinter as tk
import dateutil
import datetime
from dateutil.relativedelta import *
from dateutil.easter import *
from dateutil.parser import *
from dateutil.rrule import *
import PyPDF2
from PyPDF2 import PdfReader
from datetime import datetime, timedelta
from tkinter import *
folder_path = 'C:users/axlra/documents/datechecker'
for filename in glob.glob(os.path.join(folder_path, '*.pdf')):
with open(files[i], 'r') as f:
text = f.read()
print (files[i])
print (len(text))
def checknow():
tomorrow = (datetime.now() + timedelta(days=1)).strftime("%d-%m-%Y")
file = open(files[i], 'rb')
reader = PdfReader(files[i])
total = len(reader.pages)
for x in range(total+1):
if x > total: file.close()
page = reader.pages[0]
found = False
text = (page.extract_text())
parts = []
def visitor_body(text, cm, tm, fontDict, fontSize):
y = tm[5]
if y > 1600 and y < 10000:
parts.append(text)
page.extract_text(visitor_text=visitor_body)
text_body = "".join(parts)
#print(text_body)
word = text_body
word=word[22:-1]
#print(word)
prodate = parse(word)
str_date = prodate.strftime("%d-%m-%Y")
print(str_date)
print(files[i])
if tomorrow in str_date:
found = True
if found:
#print(x)
print("Tomorrow's date was found on page"+ " "+str(x))
else:
#print(x)
print("Tomorrow's date was NOT found on page"+ " "+str(x))
location = os.getcwd() # get present working directory location here
counter = 0 #keep a count of all files found
files = [] #list to store all pdf files found at location
for file in os.listdir(location):
try:
if file.endswith(".pdf"):
print ("pdf file found:\t", file)
files.append(str(file))
counter = counter
except Exception as e:
raise e
print ("No files found here!")
root = Tk()
btn = [] #creates list to store the buttons ins
for i in range(counter): #this just popultes a list as a replacement for the actual inputs for troubleshooting purposes
files.append(str(i))
for i in range(len(files)): #this says for *counter* in *however many elements there are in the list files*
#the below line creates a button and stores it in an array we can call later, it will print the value of it's own text by referencing itself from the list that the buttons are stored in
btn.append(Button(root, text=files[i], command=checknow))
btn[i].pack() #this packs the buttons
root.mainloop()
Based off the given solutions, this is the working code, the solution was to completely get rid of the 'i list' and just use file_path:
import os
import tkinter as tk
from tkinter import messagebox
import os, glob
import fileinput
import tkinter as tk
import dateutil
import datetime
from dateutil.relativedelta import *
from dateutil.easter import *
from dateutil.parser import *
from dateutil.rrule import *
import PyPDF2
from PyPDF2 import PdfReader
from datetime import datetime, timedelta
from tkinter import *
import re
location = os.getcwd()
counter = 0
files = []
for file in os.listdir(location):
try:
if file.endswith(".pdf"):
print ("pdf file found:\t", file)
files.append(str(file))
counter = counter
except Exception as e:
raise e
print ("No files found here!")
tomorrow = (datetime.now() + timedelta(days=-1)).strftime("%A,%B%e")
tomorrow = tomorrow.replace(" ", "")
tomorrow2 = (datetime.now() + timedelta(days=-1)).strftime("%d.%m.%Y")
tomorrow2 = tomorrow.replace(" ", "")
tomorrow3 = (datetime.now() + timedelta(days=-1)).strftime("%A%e%B%Y")
tomorrow3 = tomorrow.replace(" ", "")
tomorrow4 = (datetime.now() + timedelta(days=-1)).strftime("%A,%B%e")
tomorrow4 = tomorrow.replace(" ", "")
tomorrow5 = (datetime.now() + timedelta(days=-1)).strftime("%A,%e%B")
tomorrow5 = tomorrow.replace(" ", "")
def open_pdf(file_path):
file = open(file_path, 'rb')
reader = PdfReader(file)
total = len(reader.pages)
for x in range(total):
if x > x: file.close()
page = reader.pages[x]
text = (page.extract_text())
text = text.replace(" ", "")
#print(text)
substring = tomorrow
first_index = text.find(substring)
if first_index != -1:
second_index = text.find(substring, first_index + len(substring))
if second_index != -1:
print("Tomorrows date "+ tomorrow+ " appears twice on page"+ " "+str(x).format(substring))
else:
print("Tomorrows date "+ tomorrow+ " appears only once on page"+ " "+str(x)+" -".format(substring))
else:
print("Tomorrows date "+ tomorrow+ " does not appear on page"+ " "+str(x)+" ---".format(substring))
def create_buttons(directory):
for filename in os.listdir(directory):
if filename.endswith(".pdf"):
file_path = os.path.join(directory, filename)
button = tk.Button(root, text=filename, command=lambda f=file_path: open_pdf(f))
button.pack()
root = tk.Tk()
create_buttons(os.getcwd())
root.mainloop()