1

I try to make simple AR program.

I use Aruco(OpenCV) and OpenGL but my program is little wrong.

When I move camera, the cube is move and draw wrong place.

I upload sample image

[Fig1]1

and

[Fig2]2.

I hope the cube is always drawn in center of marker.

My all source is below. I use Python3.

#coding: utf-8

from OpenGL.GL import *
from OpenGL.GLU import *
from OpenGL.GLUT import *
import cv2
import numpy as np
import time


USE_CAMERA = True

# Set AR
aruco = cv2.aruco
dictionary = aruco.getPredefinedDictionary(aruco.DICT_4X4_50)

#load camera parameter
mtx = np.loadtxt("mtx.csv",delimiter=",")
dist = np.loadtxt("dist.csv",delimiter=",")

alpha = mtx[0][0]
beta = mtx[1][1]
cx = mtx[0][2]
cy = mtx[1][2]

if USE_CAMERA:
    # USB camera setup
    cap = cv2.VideoCapture(0)
    if cap.isOpened() is False:
        raise("IO Error")
    cap.set(cv2.CAP_PROP_FPS, 30)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

windowWidth = 640
windowHeight = 480

def draw():
    if USE_CAMERA:
        ret, img = cap.read() #read camera image
    else:
        img = cv2.imread('testimg.jpg') # if use the image file

    # Aruco
    corners, ids, rejectedImgPoints = aruco.detectMarkers(img, dictionary)
    rvec, tvec, _objPoints = aruco.estimatePoseSingleMarkers(corners, 8.0, mtx, dist)
    if not ids is None:
        # draw axis and center circle
        p1 = corners[0][0][2]
        p2 = corners[0][0][1]
        p3 = corners[0][0][0]
        p4 = corners[0][0][3]
        s1 = ((p4[0] - p2[0])*(p1[1] - p2[1]) - (p4[1] - p2[1])*(p1[0] - p2[0])) / 2
        s2 = ((p4[0] - p2[0])*(p2[1] - p3[1]) - (p4[1] - p2[1])*(p2[0] - p3[0])) / 2
        c1x = p1[0] + (p3[0] - p1[0]) * s1 / (s1 + s2)
        c1y = p1[1] + (p3[1] - p1[1]) * s1 / (s1 + s2)
        cv2.circle(img,(int(c1x), int(c1y)),10,(0,0,1))
        aruco.drawAxis(img, mtx, dist, rvec[0], tvec[0], 8.0)
    img= cv2.cvtColor(img,cv2.COLOR_BGR2RGB) #BGR-->RGB
    h, w = img.shape[:2]


    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, w, h, 0, GL_RGB, GL_UNSIGNED_BYTE, img)

    ## Enable / Disable
    glDisable(GL_DEPTH_TEST)    # Disable GL_DEPTH_TEST
    glDisable(GL_LIGHTING)      # Disable Light
    glDisable(GL_LIGHT0)        # Disable Light
    glEnable(GL_TEXTURE_2D)     # Enable texture map

    ## init
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)  # Clear Buffer
    glColor3f(1.0, 1.0, 1.0)    # Set texture Color(RGB: 0.0 ~ 1.0)
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR)
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR)

    ## draw background
    glMatrixMode(GL_PROJECTION)
    glLoadIdentity()
    glPushMatrix()
    glBegin(GL_QUADS)
    glTexCoord2d(0.0, 1.0)
    glVertex3d(-1.0, -1.0,  0)
    glTexCoord2d(1.0, 1.0)
    glVertex3d( 1.0, -1.0,  0)
    glTexCoord2d(1.0, 0.0)
    glVertex3d( 1.0,  1.0,  0)
    glTexCoord2d(0.0, 0.0)
    glVertex3d(-1.0,  1.0,  0)
    glEnd()
    glPopMatrix()

    ## Enable / Disable
    glEnable(GL_DEPTH_TEST)     # Enable GL_DEPTH_TEST
    glEnable(GL_LIGHTING)       # Enable Light
    glEnable(GL_LIGHT0)         # Enable Light
    glDisable(GL_TEXTURE_2D)    # Disable texture map

    ## make projection matrix
    f = 1000.0  #far
    n = 1.0     #near

    m1 = np.array([
    [(alpha)/cx, 0, 0, 0],
    [0, beta/cy, 0, 0],
    [0, 0, -(f+n)/(f-n), (-2.0*f*n)/(f-n)],
    [0,0,-1,0],
    ])
    glLoadMatrixd(m1.T)

    ## draw cube
    glMatrixMode(GL_MODELVIEW)
    glLoadIdentity()
    glPushMatrix()  #projection Push(?)

    glMaterialfv(GL_FRONT_AND_BACK, GL_AMBIENT_AND_DIFFUSE, [0.0,0.0,1.0,1.0])
    if not ids is None:
        # fix axis
        tvec[0][0][0] = tvec[0][0][0]
        tvec[0][0][1] = -tvec[0][0][1]
        tvec[0][0][2] = -tvec[0][0][2]

        rvec[0][0][1] = -rvec[0][0][1]
        rvec[0][0][2] = -rvec[0][0][2]
        m = compositeArray(cv2.Rodrigues(rvec)[0], tvec[0][0])
        glPushMatrix()
        glLoadMatrixd(m.T)

        glTranslatef(0, 0, -0.5)
        glutSolidCube(1.0)
        glPopMatrix()

    glPopMatrix()   #projection POP(?)


    glFlush();
    glutSwapBuffers()


def compositeArray(rvec, tvec):
    v = np.c_[rvec, tvec.T]
    #print(v)
    v_ = np.r_[v, np.array([[0,0,0,1]])]
    return v_

def init():
    #glClearColor(0.7, 0.7, 0.7, 0.7)
    glClearColor(0.0, 0.0, 0.0, 1.0)
    glEnable(GL_DEPTH_TEST)

    glEnable(GL_LIGHTING)
    glEnable(GL_LIGHT0)

def idle():
    glutPostRedisplay()

def reshape(w, h):
    glViewport(0, 0, w, h)
    glLoadIdentity()
    glOrtho(-w / windowWidth, w / windowWidth, -h / windowHeight, h / windowHeight, -1.0, 1.0)

def keyboard(key, x, y):
    # convert byte to str
    key = key.decode('utf-8')
    if key == 'q':
        print('exit')
        sys.exit()

if __name__ == "__main__":
    glutInitWindowPosition(0, 0);
    glutInitWindowSize(windowWidth, windowHeight);
    glutInit(sys.argv)

    glutSetOption(GLUT_ACTION_ON_WINDOW_CLOSE, GLUT_ACTION_GLUTMAINLOOP_RETURNS);
    glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE | GLUT_DEPTH)
    glutCreateWindow(b"Display")
    glutDisplayFunc(draw)
    glutReshapeFunc(reshape)
    glutKeyboardFunc(keyboard)
    init()
    glutIdleFunc(idle)

    glutMainLoop()

Projection Matrix referred to here.
My Camera parameter is,

dist.csv:  
-1.034e-02, -5.17e-01, 3.512e-03, -4.767e-03, 1.831e+00

mtx.csv:  
6.677e+02, 0.000e+00, 2.984e+02  
0.000e+00, 6.915e+02, 2.375e+02  
0.000e+00, 0.000e+00, 1.000e+00  

What is wrong?

p.s.
I understood my projection matrix is wrong.
So, I change the projection matrix but cube still exist wrong position. ;(
I guess that I need some of coefficient to tvec, what do you think? The error video is here.

1 Answers1

1

Your matrix calculation is mathematically correct, but practically wrong.

See OpenGL 2.0 API Specification; 2.11. COORDINATE TRANSFORMATIONS; page 43

LoadMatrix takes a pointer to a 4 × 4 matrix stored in column-major order as 16 consecutive floating-point values, i.e. as

[ a1 a5 a9 a13
  a2 a6 a10 a14
  a3 a7 a11 a15
  a4 a8 a12 a16 ]

(This differs from the standard row-major C ordering for matrix elements. If the standard ordering is used, all of the subsequent transformation equations are transposed, and the columns representing vectors become rows.)

This means the perspective projection Matrix has to be set like this:

r = right, l = left, b = bottom, t = top, n = near, f = far

2*n/(r-l)      0              0                0
0              2*n/(t-b)      0                0
(r+l)/(r-l)    (t+b)/(t-b)    -(f+n)/(f-n)    -1    
0              0              -2*f*n/(f-n)     0


Transpose the projection matrix to solve the issue:

m1 = np.array([
    [(alpha)/cx, 0,       0,                0 ],
    [0,          beta/cy, 0,                0 ],
    [0,          0,       -(f+n)/(f-n),     -1],
    [0,          0,       (-2.0*f*n)/(f-n), 0 ],
])
glLoadMatrixd(m1.T)

Alternatively glLoadTransposeMatrix can be used insted of glLoadMatrix. glLoadTransposeMatrix replace the current matrix with the specified row-major ordered matrix:

m1 = np.array([
    [(alpha)/cx, 0,       0,            0               ],
    [0,          beta/cy, 0,            0               ],
    [0,          0,       -(f+n)/(f-n), (-2.0*f*n)/(f-n)],
    [0,          0,       -1,           0               ],
])
glLoadTransposeMatrixd(m1.T)


The same fault is present, when you set up the model view matrix. Change your code like this:

m = compositeArray(cv2.Rodrigues(rvec)[0], tvec[0][0])
.....
glLoadTransposeMatrixd(m.T)
Rabbid76
  • 202,892
  • 27
  • 131
  • 174