0

Hey guys can anyone help me debug this code please? I am stuck...

My Goal: To create a a loop that constantly checks a folder, to see if new .json files have been added every 60 second, then get some data from the newly added .json files, and export it to a xlsx.

I tried the code, no error comes up, but no output.xlsx is out either.

Here is the code

import os
from os import listdir
from os.path import isfile, join, splitext
import json
import pandas as pd
import time

# Define a function to return JSON files in a directory
def jsonFilesInDirectory(my_dir: str):
    onlyfiles = [f for f in listdir(my_dir) if isfile(join(my_dir, f)) and splitext(f)[1].lower() == '.json']
    return onlyfiles

# Function comparing two lists
def listComparison(originalList: list, newList: list):
    differencesList = [x for x in newList if x not in originalList]  # Note: if files get deleted, this will not highlight them
    return differencesList

def doThingsWithNewFiles(fileDiff: list, my_dir: str):
    for file_name in fileDiff:
        file_path = os.path.join(my_dir, file_name)
        with open(file_path, 'r', encoding='utf-8') as file:
            json_data = json.load(file)

        # Extract data from JSON
        url = json_data["finalUrl"]
        fetch_time = json_data["fetchTime"]
        fcp_metric = json_data["audits"]["first-contentful-paint"]["id"]
        fcp_value = json_data["audits"]["first-contentful-paint"]["displayValue"]
        fcp_score = json_data["audits"]["first-contentful-paint"]["score"]
        lcp_metric = json_data["audits"]["largest-contentful-paint"]["id"]
        lcp_value = json_data["audits"]["largest-contentful-paint"]["displayValue"]
        lcp_score = json_data["audits"]["largest-contentful-paint"]["score"]
        fmp_metric = json_data["audits"]["first-meaningful-paint"]["id"]
        fmp_value = json_data["audits"]["first-meaningful-paint"]["displayValue"]
        fmp_score = json_data["audits"]["first-meaningful-paint"]["score"]
        si_metric = json_data["audits"]["speed-index"]["id"]
        si_value = json_data["audits"]["speed-index"]["displayValue"]
        si_score = json_data["audits"]["speed-index"]["score"]
        tbt_metric = json_data["audits"]["total-blocking-time"]["id"]
        tbt_value = json_data["audits"]["total-blocking-time"]["displayValue"]
        tbt_score = json_data["audits"]["total-blocking-time"]["score"]
        cls_metric = json_data["audits"]["cumulative-layout-shift"]["id"]
        cls_value = json_data["audits"]["cumulative-layout-shift"]["displayValue"]
        cls_score = json_data["audits"]["cumulative-layout-shift"]["score"]

        # Clean data
        cleaned_fcp_value = fcp_value.replace('\xa0s', '')
        cleaned_lcp_value = lcp_value.replace('\xa0s', '')
        cleaned_fmp_value = fmp_value.replace('\xa0s', '')
        cleaned_si_value = si_value.replace('\xa0s', '')
        cleaned_tbt_value = tbt_value.replace('\xa0ms', '')

        # Create data dictionary
        data_dict = {
            "fetch_time": [fetch_time] * 6,
            "url": [url] * 6,
            "metric": ["first_contentful_paint", "largest_contentful_paint", "first-meaningful-paint",
                       "speed-index", "total-blocking-time", "cumulative-layout-shift"],
            "value": [cleaned_fcp_value, cleaned_lcp_value, cleaned_fmp_value, cleaned_si_value,
                      cleaned_tbt_value, cls_value],
            "score": [fcp_score, lcp_score, fmp_score, si_score, tbt_score, cls_score]
        }

        df = pd.DataFrame(data_dict)

        # Export DataFrame to Excel
        excel_file_path = os.path.join(my_dir, 'output.xlsx')
        if os.path.exists(excel_file_path):
            with pd.ExcelWriter(excel_file_path, engine='openpyxl', mode='a') as writer:
                df.to_excel(writer, sheet_name='Sheet1', index=False, header=False,
                            startrow=writer.sheets['Sheet1'].max_row)
        else:
            df.to_excel(excel_file_path, index=False)

        print(f"DataFrame exported to {excel_file_path}")

def fileWatcher(my_dir: str, pollTime: int):
    while True:
        if 'watching' not in locals():  # Check if this is the first time the function has run
            previousFileList = jsonFilesInDirectory(my_dir)
            watching = 1
            print('First Time')
            print(previousFileList)

        time.sleep(pollTime)

        newFileList = jsonFilesInDirectory(my_dir)

        fileDiff = listComparison(previousFileList, newFileList)

        previousFileList = newFileList
        if len(fileDiff) == 0:
            continue
        doThingsWithNewFiles(fileDiff, my_dir)

my_dir = r"C:\Users\84948\Desktop\EC\Project\Test_Folder"
pollTime = 60

fileWatcher(my_dir, pollTime)

I tried pretty much all I could, I was expecting an output.xlsx to come out (I have tested each block of code individually and they worked fine, but when they come together something does not add up.

Nick ODell
  • 15,465
  • 3
  • 32
  • 66
Eric Do
  • 5
  • 3
  • Does the program ever print `DataFrame exported to ...` ? Do you know whether the function `doThingsWithNewFiles()` has been called? – Nick ODell Jun 26 '23 at 17:00
  • The code creates/appends the Excel file for new JSON files only, it'll skip any existing JSON files. – Zero Jun 26 '23 at 17:33
  • @NickODell apparently after this guy Mohit Goyal advise it does now for sure! – Eric Do Jun 27 '23 at 00:52
  • @MohitGoyal you sir are ingenious! – Eric Do Jun 27 '23 at 00:52
  • Happy to help, @EricDo – Zero Jun 27 '23 at 09:50
  • @Zero could you have a look at my updated code? I still have some issue with it :( it works now but not to the full extend I need it to be. Link: https://stackoverflow.com/questions/76684076/file-watcher-loop-improvement-needed-please-help-me-you-kind-people – Eric Do Jul 14 '23 at 00:55

0 Answers0