I am trying to loop through my subdirectories to read in my zip files. I am getting error TypeError: 'WindowsPath' object is not iterable
What i am trying:
path = Path("O:/Stack/Over/Flow/")
for p in path.rglob("*"):
print(p.name)
zip_files = (str(x) for x in Path(p.name).glob("*.zip"))
df = process_files(p) #function
What does work - when I go to the folder directly with my path:
path = r'O:/Stack/Over/Flow/2022 - 10/'
zip_files = (str(x) for x in Path(path).glob("*.zip"))
df = process_files(zip_files)
any help would be appreciated.
Directory structure is like:
//Stack/Over/Flow/2022 - 10/Original.zip
//Stack/Over/Flow/2022 - 09/Next file.zip
function i call:
from io import BytesIO
from pathlib import Path
from zipfile import ZipFile
import os
import pandas as pd
def process_files(files: list) -> pd.DataFrame:
file_mapping = {}
for file in files:
#data_mapping = pd.read_excel(BytesIO(ZipFile(file).read(Path(file).stem)), sheet_name=None)
archive = ZipFile(file)
# find file names in the archive which end in `.xls`, `.xlsx`, `.xlsb`, ...
files_in_archive = archive.namelist()
excel_files_in_archive = [
f for f in files_in_archive if Path(f).suffix[:4] == ".xls"
]
# ensure we only have one file (otherwise, loop or choose one somehow)
assert len(excel_files_in_archive) == 1
# read in data
data_mapping = pd.read_excel(
BytesIO(archive.read(excel_files_in_archive[0])),
sheet_name=None,
)
row_counts = []
for sheet in list(data_mapping.keys()):
row_counts.append(len(data_mapping.get(sheet)))
file_mapping.update({file: sum(row_counts)})
frame = pd.DataFrame([file_mapping]).transpose().reset_index()
frame.columns = ["file_name", "row_counts"]
return frame
New : what I am trying
for root, dirs, files in os.walk(dir_path):
for file in files:
print(files)
if file.endswith('.zip'):
df = process_files(os.path.join(root, file))
print(df) #function
else:
print("nyeh")
This is returning files like Original - All fields - 11012021 - 11302021.zip
but then i get an error OSError: [Errno 22] Invalid argument: '\\'