I have a folder with csv files with the following files: car.csv, person.csv, student.csv.... every files have columns.
I am trying to read the column values and put them inside a list.
this is my function:
import pandas
from itertools import chain, combinations
def key_options(items):
print(7,items)
return chain.from_iterable(combinations(items, r) for r in range(1, len(items)+1) )
def primary_key_discovery(data_path):
csv_files = glob.glob(os.path.join(data_path, "*.csv"))
for f in csv_files:
dataframes = pd.read_csv(f)
for candidate in key_options(list(dataframes)[1:]):
deduped = dataframes.drop_duplicates(candidate)
if len(deduped.index) == len(dataframes.index):
print(f,','.join(candidate))
print(primary_key_discovery('Data1/'))
this function gives me the output like this Data1\vehicle.csv model,price,engine-size I want to print the column values for example model:camery,altima,aclass,....