I have the below code which queries a database of about 500k rows. and it throws a SIGKILL when it hits rows = cur.fetchall()
. I've tried to iterate through the cursor rather than load it all up into rows, but it still seems to cause OOM issues.
How can I grab all the data from a database and safely convert it into a parquet file regardless of the size of the table?
def get_parquet_for_dataset_id(self, dataset, lob, max_dt):
query = _table_query(lob, table_name, max_dt)
conn = self.conns[lob]
with conn:
with conn.cursor(cursor_factory=extras.RealDictCursor) as cur:
cur.execute(query)
rows = cur.fetchall()
table = rows_to_table(rows)
pq_bytes = io.BytesIO()
pq.write_table(table, pq_bytes)
_ = pq_bytes.seek(0)
return pq_bytes;