from multiprocessing import Pool
part_process_list - [{"topic":"","partition":1,"s_offset":"","e_at":""},{"topic":"","partition":2,"s_offset":"","e_at":""},{"topic":"","partition":3,"s_offset":"","e_at":""}]
with Pool(5) as executor:
try:
results = list(executor.map(self.comsume_msg,part_process_list))
except Exception as e:
raise Exception(e)
def comsume_msg(self,part):
cnsr = confluent_kafka.Consumer(self._src_kafka_config)
topic = part['topic']
partition = part['partition']
s_offset = part['start_offset']
e_at = part['end_at']
tp = [confluent_kafka.TopicPartition(topic,partition,s_offset)]
# Set the consumer for the provided topic and offset
cnsr.assign(tp)
ast = cnsr.assignment()
done_processing=False
while not done_processing:
# Download messages in bulk
msg_list = cnsr.consume(10000,1)
print('msg_list---',msg_list)
we are getting 0 records for the partition while extracting the data. When rerun the same job I am getting the records for the same partition and getting0 records for different partition.
Sometimes I am getting all the records for all the partition.
So whenever I am running the job I need to fetch all the records from all the partition.