I'm getting the following error while running the DirectRunner: can't pickle generator objects [while running 'Assign Side Columns']
The error is popping up when the beam is writing the "mapped rows" to BigQuery.
Any idea where this error is coming from?
def assign_columns(row, mapping):
main_account = row['filepath_platform_id']
main_report = row['filepath_report_name']
for _mapping in mapping:
_side_account = _mapping['account_id']
_side_report = _mapping['report_name']
if main_account == _side_account and main_report == _side_report:
_cols = json.loads(_mapping['mapping'])
row = renames_columns(row, _cols)
yield row
def run(argv=None, save_main_session=True):
options = PipelineOptions(flags=argv)
options.view_as(SetupOptions).save_main_session = save_main_session
table_spec = 'SELECT * FROM `nextwork-staging.datalake.v_mapping_reports`'
with beam.Pipeline(options=options) as p:
common_options = options.view_as(CommonOptions)
file_metadata = (
p
| 'Create empty PCollection' >> beam.Create(['Start'])
| 'Load Input Location' >> beam.ParDo(CreateFilesFromGlobByDate(common_options.input, None, None, None, common_options.starting_date, common_options.ending_date, common_options.only_last_file_from_input))
)
rows = (
file_metadata | 'GetIncomeAccessFn' >> beam.ParDo(GetIncomeAccessFn())
| 'Map to regular dictionary' >> beam.Map(lambda x: dict(x))
| 'TransformDate' >> beam.ParDo(TransformDateFromStringFn())
)
side_input = (
p
| 'ReadTable' >> beam.io.ReadFromBigQuery(query=table_spec, use_standard_sql=True)
| 'Map Columns' >> beam.Map(lambda x: dict(x))
)
mapped_rows = (
rows | 'Assign Side Columns' >> beam.Map(assign_columns, mapping=beam.pvalue.AsIter(side_input))
| 'Rename column' >> beam.Map(renames_columns, names={'site': 'campaign'})
| 'TransformForBigQuery' >> beam.ParDo(TransformForBigQueryFn())
)
gcp_options = options.view_as(GoogleCloudOptions)
(
mapped_rows
| 'WriteToBigQueryUsingParameter' >> bigquery_file_loads.BigQueryBatchFileLoads(
destination=_get_table_name,
schema=_get_schema,
create_disposition='CREATE_IF_NEEDED',
write_disposition='WRITE_TRUNCATE',
additional_bq_parameters=_get_additional_bq_parameters,
custom_gcs_temp_location=gcp_options.temp_location
)
)