I am referring to this tutorial for kafka which I have changed the video frames to image inputs. In my case, consumer.poll() does not return anything and breaks out of the code. In the docker terminal displays this:
kafka1 | [2023-01-10 07:21:30,719] INFO [GroupCoordinator 1]: Dynamic member with unknown member id joins group kafka-cam in Empty state. Created a new member id rdkafka-f0ea3fd0-2eef-494d-bbe5-1acd9db067c4 and request the member to rejoin with this id. (kafka.coordinator.group.GroupCoordinator)
kafka1 | [2023-01-10 07:21:30,721] INFO [GroupCoordinator 1]: Preparing to rebalance group kafka-cam in state PreparingRebalance with old generation 0 (__consumer_offsets-24) (reason: Adding new member rdkafka-f0ea3fd0-2eef-494d-bbe5-1acd9db067c4 with group instance id None; client reason: not provided) (kafka.coordinator.group.GroupCoordinator)
kafka1 | [2023-01-10 07:21:33,722] INFO [GroupCoordinator 1]: Stabilized group kafka-cam generation 1 (__consumer_offsets-24) with 1 members (kafka.coordinator.group.GroupCoordinator)(kafka.coordinator.group.GroupCoordinator)
But it does not actually run the code below:
while True:
print("entered")
msg = consumer.poll(100)
print(msg)
if msg == None:
continue
as consumer.poll exits the entire code as a whole and does not run anything after that.
According to the printing statements I have added for testing purposes, msg should either return None or the value of whatever the consumer polls. However, it breaks out of the code.
python consumer.py
Creating consumer thread...
Starting the consumer thread...
entered
As shown above, it prints entered but does not run the print(msg) after calling poll.
The poll function should only return None or msg, otherwise throw an exception. It should not break out of the code. Anyone experiencing this issue? I might be doing something wrong as this is the first time I am using kafka.
My Producer file:
from producer_config import config as producer_config
from confluent_kafka import Producer
import os
import concurrent.futures
import cv2
from utils import *
from glob import glob
class ProducerThread:
def __init__(self, config):
self.producer = Producer(config)
def publishImage(self, img_path):
img = cv2.imread(img_path)
print(img.shape)
img_name = os.path.basename(img_path).split(".")[0]
img_bytes = serializeImg(img)
self.producer.produce(
topic = "multi-cam-stream",
value = img_bytes,
on_delivery = delivery_report,
timestamp = 0,
headers = {
"img_name": str.encode(img_name)
}
)
self.producer.poll(0.5)
return
def start(self, img_paths):
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(self.publishImage, img_paths)
self.producer.flush() # push all the remaining msgs in the queue
print("Finished")
if __name__ == "__main__":
img_dir = "imgs/"
img_paths = glob(img_dir + "*.JPG")
producer_thread = ProducerThread(producer_config)
producer_thread.start(img_paths)
Producer Config:
config = {
'bootstrap.servers': '127.0.0.1:9092',
'group.id': 'kafka-cam',
'enable.auto.commit': False,
'default.topic.config': {'auto.offset.reset': 'earliest'},
}
My Consumer file:
import threading
from confluent_kafka import Consumer, KafkaError, KafkaException
from consumer_config import config as consumer_config
from utils import *
from pymongo import MongoClient
from dotenv import load_dotenv
import os
import base64
import requests
import json
class ConsumerThread:
def __init__(self, config, topic, batch_size, model_api, db):
self.config = config
self.topic = topic
self.batch_size = batch_size
self.model_api = model_api
self.db = db
def read_data(self):
consumer = Consumer(self.config)
consumer.subscribe(self.topic)
self.run(consumer,0)
def run(self, consumer, msg_count):
try:
coll = db.outputs
imgs = []
# msg = consumer.poll(2)
# print(msg)
while True:
print("entered")
msg = consumer.poll(timeout=3)
# why did it skip after poll?
print(msg)
if msg == None:
print("msg is empty")
continue
elif msg.error() is not None:
raise KafkaException(msg.error())
elif msg.error() == None:
# msg.value() returns bytes
img_bytes = msg.value()
# img_bytes = base64.b64decode(img_bytes)
imgs.append(img_bytes)
print(len(imgs))
msg_count += 1
if msg_count % self.batch_size == 0:
print(len(imgs))
files = {
f"{_id}": img
for _id, img in enumerate(imgs)
}
# feed it to the model api
response = requests.post(MODEL_API, files=files)
results = json.loads(response.content)
for _id in files:
img_str = base64.b64encode(files[_id])
img = {
"image": img_str,
"result": results[int(_id)]
}
coll.insert_one(img)
consumer.commit(asynchronous=False)
# msg_count = 0
elif msg.error().code() == KafkaError._PARTITION_EOF:
print('End of partition reached {0}/{1}'
.format(msg.topic(), msg.partition()))
else:
print('Error occured: {0}'.format(msg.error().str()))
print(msg_count)
except KeyboardInterrupt:
print("Detected Keyboard Interrupt. Quitting...")
pass
finally:
print("closing")
consumer.close()
def start(self, numThreads):
for _ in range(numThreads):
t = threading.Thread(target=self.read_data)
t.daemon = True
t.start()
if __name__ == "__main__":
load_dotenv()
topic = ["multi-cam-stream"]
MODEL_API = os.environ["MODEL_API"]
MONGODB_URI = os.environ["MONGODB_URI"]
client = MongoClient(MONGODB_URI)
db = client.smart_agriculture
print("Creating consumer thread...")
consumer_thread = ConsumerThread(consumer_config, topic, 4, MODEL_API, db)
print("Starting the consumer thread...")
consumer_thread.start(1)
Consumer Config:
config = {
'bootstrap.servers': '127.0.0.1:9092',
'group.id': 'kafka-cam',
'enable.auto.commit': False,
'default.topic.config': {'auto.offset.reset': 'earliest'},
'max.poll.interval.ms': 20000,
'session.timeout.ms': 10000,
'fetch.message.max.bytes': 10000000,
'max.partition.fetch.bytes': 1000000
}
Utils.py:
import logging
import cv2
import json
logging.basicConfig(level=logging.INFO, format='%(name)s - %(levelname)s - %(message)s')
def delivery_report(err, msg):
if err:
logging.error("Failed to deliver message: {0}: {1}"
.format(msg.value(), err.str()))
else:
logging.info(f"msg produced. \n"
f"Topic: {msg.topic()} \n" +
f"Partition: {msg.partition()} \n" +
f"Offset: {msg.offset()} \n" +
f"Timestamp: {msg.timestamp()} \n")
def serializeImg(img):
_, img_buffer_arr = cv2.imencode(".jpg", img)
img_bytes = img_buffer_arr.tobytes()
return img_bytes
def jsonify(img):
return json.dumps(img)