0

Below is the code snippet which uploading data from delta table(total 8000 rows) to eventhub. While counting the total uploaded rows from SeviceBus i found out that total 7999 records getting uploaded. Any suggestion?

df = spark.read.format('delta').load("dbfs:/user/hive/warehouse/x")
data_list = df.select("*").collect()

if not data_list:
  self.logger.error("No data available")
  raise Exception('Problem to send data')
list_of_dicts = []

for i in data_list:
 sub_dicts = {k:('%s'%v) for k,v in (i.asDict().items() if i else {})}
 list_of_dicts.append(sub_dicts)
  
total_messages = len(list_of_dicts)
batch = self.event_hub_client.create_batch(partition_id='0')
for i in range(total_messages):
  indv_msg = list_of_dicts[i]
  user_encode_data = json.dumps(indv_msg, indent=2, ensure_ascii=False)
  user_encode_data = str(user_encode_data)
  try:
    batch.add(EventData(user_encode_data))
  except ValueError:
    ship events
    self.event_hub_client.send_batch(batch)
    create batch
    batch = self.event_hub_client.create_batch(partition_id='0')
    i=i-1
  except Exception as e :
    self.logger.error("Error shipping event to EventHub: {}".format(e))

  if (i == total_messages - 1):
    self.event_hub_client.send_batch(batch)

self.event_hub_client.close()
self.logger.info("closing the client")
Saswat Ray
  • 141
  • 3
  • 14

0 Answers0