I am running an apache-beam python code with a direct runner. It's failing with attribute error giving exception in the thread.
AttributeError: '_SDFBoundedSourceRestrictionTracker' object has no attribute 'checkpoint'
Find the piece of the code:
def run(argv=None):
"""Main entry point; defines and runs the barc records pipeline."""
parser = argparse.ArgumentParser()
parser.add_argument('--input',
type=str,
dest='input',
default='gs://{Bucket name}/Week28 - Weekly.xlsb',
help='Input file to process')
args, pipeline_args = parser.parse_known_args(argv)
pipeline_options = PipelineOptions(pipeline_args)
with beam.Pipeline(options=pipeline_options) as p:
if args.input and args.week_num:
#Read Master from BQ
channel_master = (p | 'ReadMaster' >> beam.io.Read(beam.io.BigQuerySource(
query = "SELECT * FROM DATASET.MASTER_TABLE"
))
| "Map on name" >> beam.Map(lambda elem:(elem['name'],elem)))
#Read name
gc = (p | 'ReadGC' >> beam.io.Read(beam.io.BigQuerySource(
query = "SELECT Display_Name FROM DEST.TABLE"))
| 'yieldvals' >> beam.ParDo(PrintValsDoFn())
)
fa_data_rows = (p
| 'ReadFaData' >> ReadFromText(args.fa.format(args.week_num))
| 'ConvertFaToDict' >> beam.ParDo(ConvertFAToDictFn(
gracenoteEvent.GracenoteEventType('fa_input').get_dict_keys()
))
| 'FilterWritableRows' >> beam.Filter(lambda row: str(row['FA_CODE?']).lower() == "true"
and row['GN_ID'] != '-')
| "Map master on channel" >> beam.Map(
lambda x: (str(str(x['NAME'])), x)))
And writing the results to BQ.
Traceback:
Exception in thread Thread-2:
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/threading.py", line 932, in _bootstrap_inner
self.run()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/threading.py", line 1254, in run
self.function(*self.args, **self.kwargs)
File "/Users/kshitijbhadage/gracenote/lib/python3.8/site-packages/apache_beam/runners/direct/sdf_direct_runner.py", line 467, in initiate_checkpoint
checkpoint_state.residual_restriction = tracker.checkpoint()
AttributeError: '_SDFBoundedSourceRestrictionTracker' object has no attribute 'checkpoint'
Exception in thread Thread-3:
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/threading.py", line 932, in _bootstrap_inner
self.run()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/threading.py", line 1254, in run
self.function(*self.args, **self.kwargs)
File "/Users/kshitijbhadage/gracenote/lib/python3.8/site-packages/apache_beam/runners/direct/sdf_direct_runner.py", line 467, in initiate_checkpoint
checkpoint_state.residual_restriction = tracker.checkpoint()
AttributeError: '_SDFBoundedSourceRestrictionTracker' object has no attribute 'checkpoint'
Not exactly sure why this error is coming. Tried to debug line by line still the issue persists.