I am using Azure Face APi to detect faces in video stream, but for each detected face Azure returns a unique faceId( which is exactly what the documentation says).
The problem is, Let's say Mr.ABC appears in 20 video frames, 20 unique faceIds gets generated. I want something that Azure Face should return me a single faceId or a group of FaceIds generated particularly for Mr.ABC so that I can know that its the same person that stays in front of camera for x amount of time.
I have read the documentation of Azure Facegrouping and Azure FindSimilar, but didn't understand how can I make it work in case of live video stream.
The code I am using for detecting faces using Azure face is given below:
from azure.cognitiveservices.vision.face import FaceClient
from msrest.authentication import CognitiveServicesCredentials
from azure.cognitiveservices.vision.face.models import TrainingStatusType, Person, SnapshotObjectType, OperationStatusType
import cv2
import os
import requests
import sys,glob, uuid,re
from PIL import Image, ImageDraw
from urllib.parse import urlparse
from io import BytesIO
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient,__version__
face_key = 'XABC' #API key
face_endpoint = 'https://XENDPOINT.cognitiveservices.azure.com' #endpoint, e.g. 'https://westus.api.cognitive.microsoft.com'
credentials = CognitiveServicesCredentials(face_key)
face_client = FaceClient(face_endpoint, credentials)
camera = cv2.VideoCapture(0)
samplenum =1
im = ""
work_dir = os.getcwd()
person_group_id = 'test02-group'
target_person_group_id = str(uuid.uuid4())
face_ids = []
#cv2 font
font = cv2.FONT_HERSHEY_SIMPLEX
#empty tuple
width = ()
height = ()
left=0
bottom=0
def getRectangle(faceDictionary):
rect = faceDictionary.face_rectangle
left = rect.left
top = rect.top
right = left + rect.width
bottom = top + rect.height
return ((left, top), (right, bottom))
while True:
check,campic = camera.read()
samplenum=samplenum+1
cv2.imwrite("live_pics/"+str(samplenum)+".jpg",campic)
path = work_dir+"/live_pics/"+str(samplenum)+".jpg"
#im = cv2.imread("pics/"+str(samplenum)+".jpg")
stream = open(path, "r+b")
detected_faces = face_client.face.detect_with_stream(
stream,
return_face_id=True,
return_face_attributes=['age','gender','emotion'],recognitionModel="recognition_03")
for face in detected_faces:
width,height = getRectangle(face)
cv2.rectangle(campic,width,height,(0,0,170),2)
face_ids.append(face.face_id)
#cv2.waitKey(100);
if(samplenum>10):
break
cv2.imshow("campic", campic)
if cv2.waitKey(1) == ord("q"):
break
camera.release()
cv2.destroyAllWindows()