When I execute below code - gets following error ValueError: Table schema does not match schema used to create file.
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
fields = [
('one', pa.int64()),
('two', pa.string(), False),
('three', pa.bool_())
]
schema = pa.schema(fields)
schema = schema.remove_metadata()
df = pd.DataFrame(
{
'one': [2, 2, 2],
'two': ['foo', 'bar', 'baz'],
'three': [True, False, True]
}
)
df['two'] = df['two'].astype(str)
table = pa.Table.from_pandas(df, schema, preserve_index=False).replace_schema_metadata()
writer = pq.ParquetWriter('parquest_user_defined_schema.parquet', schema=schema)
writer.write_table(table)