I know it's bad to throw code here and ask for help to trouble shoot. This problem seems a little over my head.
The code is supposed to loop through all the files and sub folders. I don't think there is any log error here. The problem is I ran into problem that same file is processed again and caused the DB insert failed on primary key constraint.
This is my code:
import csv
import pypyodbc
import os
import sys
extension = ".tsv"
connStr = """DSN=database_test;"""
sys.stdout = open('c:\\temp\\python.log', 'w')
print 'starting ...'
def LoadFile(path):
i = 0
for item in os.listdir(path): # loop through items in dir
full_path = os.path.join(path, item)
if os.path.isfile(full_path) and full_path.endswith(extension): # check for ".tsv" extension
if full_path.find('IM') > 0:
table_name = 'table_a'
else:
table_name = 'table_b'
if os.stat(full_path).st_size > 0:
print "Processing file:", i, "|", full_path
i = i + 1
with open (full_path, 'r') as f:
reader = csv.reader(f, delimiter='\t')
columns = next(reader)
query = 'insert into ' + table_name + '({0}) values ({1})'
crsr = cnxn.cursor()
for data in reader:
query = query.format(', '.join(columns), ', '.join('?' * len(columns)))
#print(query, "with ", data)
if(data[1] != ''):
crsr.execute(query, data)
crsr.commit()
crsr.close()
elif os.path.isdir(full_path):
print "Process Folder: ", full_path
LoadFile(full_path)
else:
print("invalid file name:", item)
print "Process Folder total files: ", i, ":", full_path
return
cnxn = pypyodbc.connect(connStr)
dir_name = 'X:\\TopLevelFolder'
LoadFile(dir_name)
cnxn.close()
print("Completed")