Hey guys can anyone help me debug this code please? I am stuck...
My Goal: To create a a loop that constantly checks a folder, to see if new .json files have been added every 60 second, then get some data from the newly added .json files, and export it to a xlsx.
I tried the code, no error comes up, but no output.xlsx is out either.
Here is the code
import os
from os import listdir
from os.path import isfile, join, splitext
import json
import pandas as pd
import time
# Define a function to return JSON files in a directory
def jsonFilesInDirectory(my_dir: str):
onlyfiles = [f for f in listdir(my_dir) if isfile(join(my_dir, f)) and splitext(f)[1].lower() == '.json']
return onlyfiles
# Function comparing two lists
def listComparison(originalList: list, newList: list):
differencesList = [x for x in newList if x not in originalList] # Note: if files get deleted, this will not highlight them
return differencesList
def doThingsWithNewFiles(fileDiff: list, my_dir: str):
for file_name in fileDiff:
file_path = os.path.join(my_dir, file_name)
with open(file_path, 'r', encoding='utf-8') as file:
json_data = json.load(file)
# Extract data from JSON
url = json_data["finalUrl"]
fetch_time = json_data["fetchTime"]
fcp_metric = json_data["audits"]["first-contentful-paint"]["id"]
fcp_value = json_data["audits"]["first-contentful-paint"]["displayValue"]
fcp_score = json_data["audits"]["first-contentful-paint"]["score"]
lcp_metric = json_data["audits"]["largest-contentful-paint"]["id"]
lcp_value = json_data["audits"]["largest-contentful-paint"]["displayValue"]
lcp_score = json_data["audits"]["largest-contentful-paint"]["score"]
fmp_metric = json_data["audits"]["first-meaningful-paint"]["id"]
fmp_value = json_data["audits"]["first-meaningful-paint"]["displayValue"]
fmp_score = json_data["audits"]["first-meaningful-paint"]["score"]
si_metric = json_data["audits"]["speed-index"]["id"]
si_value = json_data["audits"]["speed-index"]["displayValue"]
si_score = json_data["audits"]["speed-index"]["score"]
tbt_metric = json_data["audits"]["total-blocking-time"]["id"]
tbt_value = json_data["audits"]["total-blocking-time"]["displayValue"]
tbt_score = json_data["audits"]["total-blocking-time"]["score"]
cls_metric = json_data["audits"]["cumulative-layout-shift"]["id"]
cls_value = json_data["audits"]["cumulative-layout-shift"]["displayValue"]
cls_score = json_data["audits"]["cumulative-layout-shift"]["score"]
# Clean data
cleaned_fcp_value = fcp_value.replace('\xa0s', '')
cleaned_lcp_value = lcp_value.replace('\xa0s', '')
cleaned_fmp_value = fmp_value.replace('\xa0s', '')
cleaned_si_value = si_value.replace('\xa0s', '')
cleaned_tbt_value = tbt_value.replace('\xa0ms', '')
# Create data dictionary
data_dict = {
"fetch_time": [fetch_time] * 6,
"url": [url] * 6,
"metric": ["first_contentful_paint", "largest_contentful_paint", "first-meaningful-paint",
"speed-index", "total-blocking-time", "cumulative-layout-shift"],
"value": [cleaned_fcp_value, cleaned_lcp_value, cleaned_fmp_value, cleaned_si_value,
cleaned_tbt_value, cls_value],
"score": [fcp_score, lcp_score, fmp_score, si_score, tbt_score, cls_score]
}
df = pd.DataFrame(data_dict)
# Export DataFrame to Excel
excel_file_path = os.path.join(my_dir, 'output.xlsx')
if os.path.exists(excel_file_path):
with pd.ExcelWriter(excel_file_path, engine='openpyxl', mode='a') as writer:
df.to_excel(writer, sheet_name='Sheet1', index=False, header=False,
startrow=writer.sheets['Sheet1'].max_row)
else:
df.to_excel(excel_file_path, index=False)
print(f"DataFrame exported to {excel_file_path}")
def fileWatcher(my_dir: str, pollTime: int):
while True:
if 'watching' not in locals(): # Check if this is the first time the function has run
previousFileList = jsonFilesInDirectory(my_dir)
watching = 1
print('First Time')
print(previousFileList)
time.sleep(pollTime)
newFileList = jsonFilesInDirectory(my_dir)
fileDiff = listComparison(previousFileList, newFileList)
previousFileList = newFileList
if len(fileDiff) == 0:
continue
doThingsWithNewFiles(fileDiff, my_dir)
my_dir = r"C:\Users\84948\Desktop\EC\Project\Test_Folder"
pollTime = 60
fileWatcher(my_dir, pollTime)
I tried pretty much all I could, I was expecting an output.xlsx to come out (I have tested each block of code individually and they worked fine, but when they come together something does not add up.