I was trying to map/translate the MP Pose Landmarks (33 values) to a BVH hierarchy and my model keeps getting flipped and bent for some reasons. The challenges I came across,
- For the motion, how to obtain Z, X, Y rotations from the MP landmarks coordinates?
- For the Hip (root), how to update the position co-ordinates, X, Y, Z and for the rest, how to update the rotation values (Z, X, Y), with the incoming frames?
If anyone can help answering any of these or maybe even point towards the right resource, that would be amazing. Thanks in advance!
The hierarchy of the BVH file is:
HIERARCHY
ROOT hip
{
OFFSET 0 0 0
CHANNELS 6 Xposition Yposition Zposition Zrotation Yrotation Xrotation
JOINT abdomen
{
OFFSET 0 20.6881 -0.73152
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT chest
{
OFFSET 0 11.7043 -0.48768
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT neck
{
OFFSET 0 22.1894 -2.19456
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT head
{
OFFSET -0.24384 7.07133 1.2192
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT leftEye
{
OFFSET 4.14528 8.04674 8.04672
CHANNELS 3 Zrotation Xrotation Yrotation
End Site
{
OFFSET 1 0 0
}
}
JOINT rightEye
{
OFFSET -3.6576 8.04674 8.04672
CHANNELS 3 Zrotation Xrotation Yrotation
End Site
{
OFFSET 1 0 0
}
}
}
}
JOINT rCollar
{
OFFSET -2.68224 19.2634 -4.8768
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT rShldr
{
OFFSET -8.77824 -1.95073 1.46304
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT rForeArm
{
OFFSET -28.1742 -1.7115 0.48768
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT rHand
{
OFFSET -22.5879 0.773209 7.07136
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT rThumb1
{
OFFSET -1.2192 -0.487915 3.41376
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT rThumb2
{
OFFSET -3.37035 -0.52449 3.41376
CHANNELS 3 Zrotation Xrotation Yrotation
End Site
{
OFFSET -1.78271 -1.18214 1.43049
}
}
}
JOINT rIndex1
{
OFFSET -7.75947 0.938293 5.60832
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT rIndex2
{
OFFSET -2.54057 -0.884171 1.56538
CHANNELS 3 Zrotation Xrotation Yrotation
End Site
{
OFFSET -1.62519 -0.234802 1.16502
}
}
}
JOINT rMid1
{
OFFSET -8.24714 1.18213 3.41376
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT rMid2
{
OFFSET -3.10165 -0.590103 1.0647
CHANNELS 3 Zrotation Xrotation Yrotation
End Site
{
OFFSET -2.48547 -0.328903 0.83742
}
}
}
JOINT rRing1
{
OFFSET -8.82822 0.546677 1.51678
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT rRing2
{
OFFSET -2.60934 -0.819778 -0.0198488
CHANNELS 3 Zrotation Xrotation Yrotation
End Site
{
OFFSET -2.33842 -0.294052 0.168128
}
}
}
JOINT rPinky1
{
OFFSET -8.27202 -0.0477905 -0.4584
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT rPinky2
{
OFFSET -1.82734 -0.647385 -0.700984
CHANNELS 3 Zrotation Xrotation Yrotation
End Site
{
OFFSET -1.69225 -0.51767 -0.607171
}
}
}
}
}
}
}
JOINT lCollar
{
OFFSET 2.68224 19.2634 -4.8768
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT lShldr
{
OFFSET 8.77824 -1.95073 1.46304
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT lForeArm
{
OFFSET 28.1742 -1.7115 0.48768
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT lHand
{
OFFSET 22.5879 0.773209 7.07136
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT lThumb1
{
OFFSET 1.2192 -0.487915 3.41376
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT lThumb2
{
OFFSET 3.37035 -0.52449 3.41376
CHANNELS 3 Zrotation Xrotation Yrotation
End Site
{
OFFSET 1.78271 -1.18214 1.43049
}
}
}
JOINT lIndex1
{
OFFSET 7.75947 0.938293 5.60832
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT lIndex2
{
OFFSET 2.54057 -0.884171 1.56538
CHANNELS 3 Zrotation Xrotation Yrotation
End Site
{
OFFSET 1.62519 -0.234802 1.16502
}
}
}
JOINT lMid1
{
OFFSET 8.24714 1.18213 3.41376
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT lMid2
{
OFFSET 3.10165 -0.590103 1.0647
CHANNELS 3 Zrotation Xrotation Yrotation
End Site
{
OFFSET 2.48547 -0.328903 0.83742
}
}
}
JOINT lRing1
{
OFFSET 8.82822 0.546677 1.51678
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT lRing2
{
OFFSET 2.60934 -0.819778 -0.0198488
CHANNELS 3 Zrotation Xrotation Yrotation
End Site
{
OFFSET 2.33842 -0.294052 0.168128
}
}
}
JOINT lPinky1
{
OFFSET 8.27202 -0.0477905 -0.4584
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT lPinky2
{
OFFSET 1.82734 -0.647385 -0.700984
CHANNELS 3 Zrotation Xrotation Yrotation
End Site
{
OFFSET 1.69225 -0.51767 -0.607171
}
}
}
}
}
}
}
}
}
JOINT rButtock
{
OFFSET -8.77824 4.35084 1.2192
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT rThigh
{
OFFSET 0 -1.70687 -2.19456
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT rShin
{
OFFSET 0 -36.8199 0.73152
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT rFoot
{
OFFSET 0.73152 -45.1104 -5.12064
CHANNELS 3 Zrotation Xrotation Yrotation
End Site
{
OFFSET -1.1221 -3.69964 12.103
}
}
}
}
}
JOINT lButtock
{
OFFSET 8.77824 4.35084 1.2192
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT lThigh
{
OFFSET 0 -1.70687 -2.19456
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT lShin
{
OFFSET 0 -36.8199 0.73152
CHANNELS 3 Zrotation Xrotation Yrotation
JOINT lFoot
{
OFFSET -0.73152 -45.1104 -5.12064
CHANNELS 3 Zrotation Xrotation Yrotation
End Site
{
OFFSET 1.1221 -3.69964 12.103
}
}
}
}
}
}
The function I have used for rotation:
def rot(a, b):
c = b - a
c /= np.linalg.norm(c)
thetaZ = np.arctan2(c[1], c[0])
thetaX = np.arctan2(c[2], c[0]**2 + c[1]**2)
thetaY = np.arctan2(c[2], np.sqrt(c[0]**2 + c[1]**2))
thetaZdeg = float('{:.3f}'.format(np.rad2deg(thetaZ))[:-1])
thetaXdeg = float('{:.3f}'.format(np.rad2deg(thetaX))[:-1])
thetaYdeg = float('{:.3f}'.format(np.rad2deg(thetaY))[:-1])
return [thetaZdeg, thetaXdeg, thetaYdeg]
The mapping that I have done from landmark to skeleton:
def dataLoad2(i, j, res1, res2):
return str((res1[i] + res2[j] / 2)[2]) + " " + str((res1[i] + res2[j] / 2)[1]) + " " + str((res1[i] + res2[j] / 2)[0]) + " "
def dataLoad(i, res):
try:
return str(res[i][2]) + " " + str(res[i][1]) + " " + str(res[i][0]) + " "
except:
return str(res[2]) + " " + str(res[1]) + " " + str(res[0]) + " "
def writeBVH(res):
hip = dataLoad2(24, 23, res, res)
chest = dataLoad2(11, 12, res, res)
temp = list(np.array([eval(i) for i in hip.split(' ')[:-1]]) + np.array([eval(i) for i in chest.split(' ')[:-1]]) / 2)
abdomen = dataLoad(0, temp)
mouth = dataLoad2(10, 9, res, res)
temp = list(np.array([eval(i) for i in mouth.split(' ')[:-1]]) + np.array([eval(i) for i in chest.split(' ')[:-1]]) / 2)
neck = dataLoad(0, temp)
head = dataLoad(0, res)
lefteye = dataLoad(2, res)
righteye = dataLoad(5, res)
rcollar = dataLoad2(12, 10, res, res)
rshldr = dataLoad(12, res)
rforearm = dataLoad(14, res)
rhand = dataLoad(16,res)
rthumb1 = dataLoad2(16, 22, res, res)
rthumb2 = dataLoad(22, res)
rindex1 = dataLoad2(16, 20, res, res)
rindex2 = dataLoad(20, res)
''' Edit These After Using Hand Landmarks '''
rmid1 = dataLoad2(16, 20, res, res)
rmid2 = dataLoad(20, res)
rring1 = dataLoad2(16, 20, res, res)
rring2 = dataLoad(20, res)
''' Edit These After Using Hand Landmarks '''
rpinky1 = dataLoad2(16, 18, res, res)
rpinky2 = dataLoad(18, res)
lcollar = dataLoad2(11, 9, res, res)
lshldr = dataLoad(11, res)
lforearm = dataLoad(13, res)
lhand = dataLoad(15,res)
lthumb1 = dataLoad2(15, 21, res, res)
lthumb2 = dataLoad(21, res)
lindex1 = dataLoad2(15, 19, res, res)
lindex2 = dataLoad(19, res)
''' Edit These After Using Hand Landmarks '''
lmid1 = dataLoad2(15, 19, res, res)
lmid2 = dataLoad(19, res)
lring1 = dataLoad2(15, 19, res, res)
lring2 = dataLoad(19, res)
''' Edit These After Using Hand Landmarks '''
lpinky1 = dataLoad2(15, 17, res, res)
lpinky2 = dataLoad(17, res)
rbuttock = dataLoad(24, res)
rthigh = dataLoad2(24, 26, res, res)
rshin = dataLoad(26, res)
rfoot = dataLoad(28, res)
lbuttock = dataLoad(23, res)
lthigh = dataLoad2(23, 25, res, res)
lshin = dataLoad(25, res)
lfoot = dataLoad(27, res)
return hip+hip+abdomen+chest+neck+head+lefteye+righteye+rcollar+rshldr+rforearm+rhand+rthumb1+rthumb2+rindex1+rindex2+rmid1+rmid2+rring1+rring2+rpinky1+rpinky2+lcollar+lshldr+lforearm+lhand+lthumb1+lthumb2+lindex1+lindex2+lmid1+lmid2+lring1+lring2+lpinky1+lpinky2+rbuttock+rthigh+rshin+rfoot+lbuttock+lthigh+lshin+lfoot+"\n"
The main loop of real-time capture:
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_pose = mp.solutions.pose
# For static images:
IMAGE_FILES = []
BG_COLOR = (192, 192, 192) # gray
with mp_pose.Pose(
static_image_mode=True,
model_complexity=2,
enable_segmentation=True,
min_detection_confidence=0.5) as pose:
for idx, file in enumerate(IMAGE_FILES):
image = cv2.imread(file)
image_height, image_width, _ = image.shape
# Convert the BGR image to RGB before processing.
results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
if not results.pose_landmarks:
continue
print(
f'Nose coordinates: ('
f'{results.pose_landmarks.landmark[mp_pose.PoseLandmark.NOSE].x * image_width}, '
f'{results.pose_landmarks.landmark[mp_pose.PoseLandmark.NOSE].y * image_height})'
)
annotated_image = image.copy()
# Draw segmentation on the image.
# To improve segmentation around boundaries, consider applying a joint
# bilateral filter to "results.segmentation_mask" with "image".
condition = np.stack((results.segmentation_mask,) * 3, axis=-1) > 0.1
bg_image = np.zeros(image.shape, dtype=np.uint8)
bg_image[:] = BG_COLOR
annotated_image = np.where(condition, annotated_image, bg_image)
# Draw pose landmarks on the image.
mp_drawing.draw_landmarks(
annotated_image,
results.pose_landmarks,
mp_pose.POSE_CONNECTIONS,
landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)
# Plot pose world landmarks.
mp_drawing.plot_landmarks(
results.pose_world_landmarks, mp_pose.POSE_CONNECTIONS)
temp = [0.00] * 132
count = 0
with open("test.bvh", "a") as bvh:
# For webcam input:
cap = cv2.VideoCapture(0)
with mp_pose.Pose(
min_detection_confidence=0.5,
min_tracking_confidence=0.5) as pose:
while cap.isOpened():
success, image = cap.read()
if not success:
print("Ignoring empty camera frame.")
# If loading a video, use 'break' instead of 'continue'.
continue
# To improve performance, optionally mark the image as not writeable to
# pass by reference.
image.flags.writeable = False
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = pose.process(image)
if results.pose_landmarks:
count += 1
res = np.array([[res.x, res.y, res.z] for res in results.pose_landmarks.landmark])
t = writeBVH(res)
t = t.split(' ')[:-1]
res = [[float('{:.3f}'.format(float(t[0]))[:-1]), float('{:.3f}'.format(float(t[1]))[:-1]), float('{:.3f}'.format(float(t[2]))[:-1])]]
t = np.array(list(map(float, t))).reshape(44,3)
temp = np.array(list(map(float, temp))).reshape(44,3)
for i in range(1, 44):
res.append(rot(t[i], temp[i]))
res = list(np.array(res).flatten())
(res[4], res[5]) = (res[5], res[4])
s = ''
for i in res:
s += str(i) + ' '
s += '\n'
bvh.write(s)
temp = list(temp.flatten())
# Draw the pose annotation on the image.
image.flags.writeable = True
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
mp_drawing.draw_landmarks(
image,
results.pose_landmarks,
mp_pose.POSE_CONNECTIONS,
landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
# Flip the image horizontally for a selfie-view display.
cv2.imshow('MediaPipe Pose', cv2.flip(image, 1))
if cv2.waitKey(5) & 0xFF == 27:
break
cap.release()
cv2.destroyAllWindows()
print(count)