I have a script to test for at least one consume
the producer
import json
import random
import time
from confluent_kafka import Producer
import config
p = Producer({'bootstrap.servers':','.join(config.KAFKA_HOST),})
total_count = 0
c = 0
try:
for i in range(20000):
num = random.randint(1, 1000000)
total_count += num
a = {'t': num, 'time': time.time()}
p.produce('test-topic-vv', json.dumps(a))
c += 1
if c %100 == 0:
p.flush()
finally:
p.flush()
the consumer
import json
import random
import sys
from confluent_kafka import Consumer, TopicPartition
import config
c = Consumer({
'bootstrap.servers':','.join(config.KAFKA_HOST),
'group.id': 'test-topic-consumer-group',
'auto.offset.reset': 'earliest',
'enable.auto.offset.store': False,
'enable.auto.commit': True,
})
topic = 'test-topic-vv'
def test_for_seek():
try:
pp = []
pp.append(TopicPartition(topic, partition=8))
c.assign(pp)
while True:
msgs = c.consume(num_messages=10, timeout=10)
if not msgs:
print('no data and wait')
for i in c.assignment():
print(i.topic, i.partition, i.offset, c.get_watermark_offsets(i))
continue
for msg in msgs:
t1 = msg.partition()
o1 = msg.offset()
print('Received message: {} par {} offset {}'.format(msg.value().decode('utf-8'), t1, o1))
break
finally:
c.close()
def test_for_run():
try:
c.subscribe([topic])
total_count = 0
map_par = {}
while True:
msgs = c.consume(num_messages=10, timeout=5)
if not msgs:
print('no data and wait')
for i in c.assignment():
print(i.topic, i.partition, i.offset, c.get_watermark_offsets(i))
continue
deald = []
for msg in msgs:
t1 = msg.partition()
o1 = msg.offset()
print('Received message: {} par {} offset {}'.format(msg.value().decode('utf-8'), t1, o1))
if random.randint(1, 100) == 9:
# test for deal failed then retry again
print('deal failed will retry msg offset {} partition {}'.format(msg.offset(), msg.partition()))
break
else:
total_count += json.loads(msg.value())['t']
# test for deal success
if t1 in map_par:
if map_par[t1] + 1 != o1:
raise Exception('deal partition {} except last offset {} current offset {}'.format(t1, map_par[t1], o1))
map_par[t1] = o1
c.store_offsets(msg)
deald.append(msg)
group_partition = {}
for msg in msgs:
if msg in deald:
continue
partition = msg.partition()
offset = msg.offset()
if partition in group_partition:
group_partition[partition] = min(group_partition[partition], offset)
else:
group_partition[partition] = offset
# seek to deal failed partition offset
for k, v in group_partition.items():
c.seek(TopicPartition(topic, partition=k, offset=v))
print('deal failed will set msg offset {} partition {}'.format(v, k))
finally:
c.close()
if sys.argv[1] == 'test_for_seek':
test_for_seek()
else:
test_for_run()
the topic test-topic-vv
has 9 partition
first i run producer to add some message to topic then consume it. but i got a exception
The latest message's offset of partition 8 should be 7382 but got 7391
then i run test_for_seek to check the consumer group's actually record offset it was 7382 indeed
I also check the broker's group offset record
it also was 7382
So what happened to consumer when use seek to manage offset, hope any one can help me to deal with the problem.
check information
- confluent_kafka.version()==1.9.2
- confluent_kafka.libversion()==1.9.2
- Operating system: ubuntu 16.04
- Python3.8
- kafka 2.11-1.1.1