Below is the code snippet which uploading data from delta table(total 8000 rows) to eventhub. While counting the total uploaded rows from SeviceBus i found out that total 7999 records getting uploaded. Any suggestion?
df = spark.read.format('delta').load("dbfs:/user/hive/warehouse/x")
data_list = df.select("*").collect()
if not data_list:
self.logger.error("No data available")
raise Exception('Problem to send data')
list_of_dicts = []
for i in data_list:
sub_dicts = {k:('%s'%v) for k,v in (i.asDict().items() if i else {})}
list_of_dicts.append(sub_dicts)
total_messages = len(list_of_dicts)
batch = self.event_hub_client.create_batch(partition_id='0')
for i in range(total_messages):
indv_msg = list_of_dicts[i]
user_encode_data = json.dumps(indv_msg, indent=2, ensure_ascii=False)
user_encode_data = str(user_encode_data)
try:
batch.add(EventData(user_encode_data))
except ValueError:
ship events
self.event_hub_client.send_batch(batch)
create batch
batch = self.event_hub_client.create_batch(partition_id='0')
i=i-1
except Exception as e :
self.logger.error("Error shipping event to EventHub: {}".format(e))
if (i == total_messages - 1):
self.event_hub_client.send_batch(batch)
self.event_hub_client.close()
self.logger.info("closing the client")