I am recently upgrading from python 2 to python 3.(anaconda 32-bit) Upgrading to 64-bit is not an option at the moment.
Upon calling my function, i am getting a MemoryError
def M2():
print ('Loading datasets...')
e1 = pd.read_csv(working_dir+"E1.txt",sep=',')
E1.txt is 300,000 kb.
Is there a better way of reading in this data?
Update
- I do not want to use
chunksize
as this will not read my data in a dataframe. - I have narrowed down my .txt file from 300k kb, to 50k kb and still memory issue.
Traceback:
Traceback (most recent call last):
File "<ipython-input-99-99e71d524b4b>", line 1, in <module>
runfile('C:/AppData/FinRecon/py_code/python3/DataJoin.py', wdir='C:/AppData/FinRecon/py_code/python3')
File "C:\Users\stack\AppData\Local\Continuum\anaconda3\anaconda3_32bit\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
execfile(filename, namespace)
File "C:\Users\stack\AppData\Local\Continuum\anaconda3\anaconda3_32bit\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/AppData/FinRecon/py_code/python3/DataJoin.py", line 474, in <module>
M2()
File "C:/AppData/FinRecon/py_code/python3/DataJoin.py", line 31, in M2
e1 = pd.read_csv(working_dir+"E1.txt",sep=',')
File "C:\Users\stack\AppData\Local\Continuum\anaconda3\anaconda3_32bit\lib\site-packages\pandas\io\parsers.py", line 702, in parser_f
return _read(filepath_or_buffer, kwds)
File "C:\Users\stack\AppData\Local\Continuum\anaconda3\anaconda3_32bit\lib\site-packages\pandas\io\parsers.py", line 435, in _read
data = parser.read(nrows)
File "C:\Users\stack\AppData\Local\Continuum\anaconda3\anaconda3_32bit\lib\site-packages\pandas\io\parsers.py", line 1154, in read
df = DataFrame(col_dict, columns=columns, index=index)
File "C:\Users\stack\AppData\Local\Continuum\anaconda3\anaconda3_32bit\lib\site-packages\pandas\core\frame.py", line 392, in __init__
mgr = init_dict(data, index, columns, dtype=dtype)
File "C:\Users\stack\AppData\Local\Continuum\anaconda3\anaconda3_32bit\lib\site-packages\pandas\core\internals\construction.py", line 212, in init_dict
return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
File "C:\Users\stack\AppData\Local\Continuum\anaconda3\anaconda3_32bit\lib\site-packages\pandas\core\internals\construction.py", line 61, in arrays_to_mgr
return create_block_manager_from_arrays(arrays, arr_names, axes)
File "C:\Users\stack\AppData\Local\Continuum\anaconda3\anaconda3_32bit\lib\site-packages\pandas\core\internals\managers.py", line 1666, in create_block_manager_from_arrays
blocks = form_blocks(arrays, names, axes)
File "C:\Users\stack\AppData\Local\Continuum\anaconda3\anaconda3_32bit\lib\site-packages\pandas\core\internals\managers.py", line 1734, in form_blocks
int_blocks = _multi_blockify(items_dict['IntBlock'])
File "C:\Users\stack\AppData\Local\Continuum\anaconda3\anaconda3_32bit\lib\site-packages\pandas\core\internals\managers.py", line 1819, in _multi_blockify
values, placement = _stack_arrays(list(tup_block), dtype)
File "C:\Users\stack\AppData\Local\Continuum\anaconda3\anaconda3_32bit\lib\site-packages\pandas\core\internals\managers.py", line 1861, in _stack_arrays
stacked = np.empty(shape, dtype=dtype)
MemoryError