I am using np.genfromtxt
to read a csv file, and trying to use the converters
argument to preprocess each column.
CSV:
"","Col1","Col2","Col3"
"1","Cell.1",NA,1
"2","Cell.2",NA,NA
"3","Cell.3",1,NA
"4","Cell.4",NA,NA
"5","Cell.5",NA,NA
"6","Cell.6",1,NA
Code:
import numpy as np
filename = 'b.csv'
h = ("", "Col1", "Col2", "Col3")
def col1_converter(v):
print(f'col1_converter {v = }')
return v
def col2_converter(v):
print(f'col2_converter {v = }')
return v
def col3_converter(v):
print(f'col3_converter {v = }')
return v
a = np.genfromtxt(
filename,
delimiter=',',
names=True,
dtype=[None, np.dtype('U8'), np.dtype('U2'), np.dtype('U2')],
usecols=range(1, len(h)),
converters={1: col1_converter, 2: col2_converter, 3: col3_converter},
deletechars='',
)
print()
print(a)
When I put print statements in the converters, I see printed an extraneous row of 1's at the beginning which doesn't actually appear in the matrix that is output. Why am I seeing this row of 1's?
col1_converter v = b'1'
col2_converter v = b'1'
col3_converter v = b'1'
col1_converter v = b'"Cell.1"'
col1_converter v = b'"Cell.2"'
col1_converter v = b'"Cell.3"'
col1_converter v = b'"Cell.4"'
col1_converter v = b'"Cell.5"'
col1_converter v = b'"Cell.6"'
col2_converter v = b'NA'
col2_converter v = b'NA'
col2_converter v = b'1'
col2_converter v = b'NA'
col2_converter v = b'NA'
col2_converter v = b'1'
col3_converter v = b'1'
col3_converter v = b'NA'
col3_converter v = b'NA'
col3_converter v = b'NA'
col3_converter v = b'NA'
col3_converter v = b'NA'
[('"Cell.1"', 'NA', '1') ('"Cell.2"', 'NA', 'NA') ('"Cell.3"', '1', 'NA')
('"Cell.4"', 'NA', 'NA') ('"Cell.5"', 'NA', 'NA') ('"Cell.6"', '1', 'NA')]