I'm writing a json to gcs using apache beam. But encountered the following error
NotImplementedError: offset: 0, whence: 0, position: 50547, last: 50547 [while running 'Writing new data to gcs/write data gcs/Write/WriteImpl/WriteBundles/WriteBundles']
Don't know why this error is occurring. Code for the same is as follows:
class WriteDataGCS(beam.PTransform):
"""
To write data to GCS
"""
def __init__(self, bucket):
"""
Initiate the bucket as a class field
:type bucket:string
:param bucket: query to be run for data
"""
self.bucket = bucket
def expand(self, pcoll):
"""
PTransform Method run when called on Class Name
:type pcoll: PCollection
:param pcoll: A pcollection
"""
(pcoll | "print intermediate" >> beam.Map(print_row))
return (pcoll | "write data gcs" >> beam.io.WriteToText(self.bucket, coder=JsonCoder(), file_name_suffix=".json"))
class JsonCoder:
"""
This class represents dump and load operations performed on json
"""
def encode(self,data):
"""
Encodes the json data.
:type data: string
:param data: Data to be encoded
"""
# logger.info("JSON DATA for encoding - {}".format(data))
return json.dumps(data,default=str)
def decode(self,data):
"""
Decodes the json data.
:type data: string
:param data: Data to be decoded
"""
# logger.info("JSON DATA for decoding - {}".format(data))
return json.loads(data)