0

I am trying to upload a huge csv file using this script below but getting error

header = ["SKU","STORAGE_AREA","MOVE_TYPE","ORDER_NO","ORDER_ITEM","PICK_VOL","M_UNIT","DATE"] 
d_type = {"SKU":"str","STORAGE_AREA":"str","MOVE_TYPE":"str","ORDER_NO":"category","ORDER_ITEM":"str","PICK_VOL":"int","M_UNIT":"str","DATE":"datetime"} 
product = pd.read_csv('pick_data.csv', encoding='latin-1', sep=',', index_col=False, header=None, names=header, dtype=d_type)

error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Input In [68], in <cell line: 3>()
      1 header = ["SKU","STORAGE_AREA","MOVE_TYPE","ORDER_NO","ORDER_ITEM","PICK_VOL","M_UNIT","DATE"] 
      2 d_type = {"SKU":"str","STORAGE_AREA":"str","MOVE_TYPE":"str","ORDER_NO":"category","ORDER_ITEM":"str","PICK_VOL":"int","M_UNIT":"str","DATE":"datetime"} 
----> 3 product = pd.read_csv('pick_data.csv', encoding='latin-1', sep=',', index_col=False, header=None, names=header, dtype=d_type)

File C:\ProgramData\Anaconda3\lib\site-packages\pandas\util\_decorators.py:311, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
    305 if len(args) > num_allow_args:
    306     warnings.warn(
    307         msg.format(arguments=arguments),
    308         FutureWarning,
    309         stacklevel=stacklevel,
    310     )
--> 311 return func(*args, **kwargs)

File C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers\readers.py:680, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
    665 kwds_defaults = _refine_defaults_read(
    666     dialect,
    667     delimiter,
   (...)
    676     defaults={"delimiter": ","},
    677 )
    678 kwds.update(kwds_defaults)
--> 680 return _read(filepath_or_buffer, kwds)

File C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers\readers.py:575, in _read(filepath_or_buffer, kwds)
    572 _validate_names(kwds.get("names", None))
    574 # Create the parser.
--> 575 parser = TextFileReader(filepath_or_buffer, **kwds)
    577 if chunksize or iterator:
    578     return parser

File C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers\readers.py:933, in TextFileReader.__init__(self, f, engine, **kwds)
    930     self.options["has_index_names"] = kwds["has_index_names"]
    932 self.handles: IOHandles | None = None
--> 933 self._engine = self._make_engine(f, self.engine)

File C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers\readers.py:1235, in TextFileReader._make_engine(self, f, engine)
   1232     raise ValueError(msg)
   1234 try:
-> 1235     return mapping[engine](f, **self.options)
   1236 except Exception:
   1237     if self.handles is not None:

File C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers\c_parser_wrapper.py:74, in CParserWrapper.__init__(self, src, **kwds)
     64 for key in (
     65     "storage_options",
     66     "encoding",
   (...)
     70     "warn_bad_lines",
     71 ):
     72     kwds.pop(key, None)
---> 74 kwds["dtype"] = ensure_dtype_objs(kwds.get("dtype", None))
     75 self._reader = parsers.TextReader(src, **kwds)
     77 self.unnamed_cols = self._reader.unnamed_cols

File C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers\c_parser_wrapper.py:416, in ensure_dtype_objs(dtype)
    411 """
    412 Ensure we have either None, a dtype object, or a dictionary mapping to
    413 dtype objects.
    414 """
    415 if isinstance(dtype, dict):
--> 416     return {k: pandas_dtype(dtype[k]) for k in dtype}
    417 elif dtype is not None:
    418     return pandas_dtype(dtype)

File C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers\c_parser_wrapper.py:416, in <dictcomp>(.0)
    411 """
    412 Ensure we have either None, a dtype object, or a dictionary mapping to
    413 dtype objects.
    414 """
    415 if isinstance(dtype, dict):
--> 416     return {k: pandas_dtype(dtype[k]) for k in dtype}
    417 elif dtype is not None:
    418     return pandas_dtype(dtype)

File C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\dtypes\common.py:1777, in pandas_dtype(dtype)
   1774 # try a numpy dtype
   1775 # raise a consistent TypeError if failed
   1776 try:
-> 1777     npdtype = np.dtype(dtype)
   1778 except SyntaxError as err:
   1779     # np.dtype uses `eval` which can raise SyntaxError
   1780     raise TypeError(f"data type '{dtype}' not understood") from err

TypeError: data type 'datetime' not understood

​

tried to change data types several times but still error

ChrisGPT was on strike
  • 127,765
  • 105
  • 273
  • 257

2 Answers2

0

Try add parse_dates=['DATE'] into your pd.read_csv like below, and avoid dtype=d_type.

pd.read_csv(r'path', parse_dates=['DATE'])

Or you can add converters={'DATE': lambda t: pd.to_datetime(t)} to your pd.read_csv and I guess with this you can use dtype=d_type.

R. Baraiya
  • 1,490
  • 1
  • 4
  • 17
0

Try to avoid 'datetime' type in the pd.read_csv()

After you read the file, you can try this method:

product['DATE'] = pd.to_datetime(product['DATE'])