2D to 3D Model Failure using Structure from Motion (OpenCV Python)

Question

import cv2
import numpy as np
import os
import pyvista as pv

# Input directory containing images
input_dir = "C:/Users/Pictures/FYP/Box2"

# Get a list of all image file paths in the directory
image_paths = [os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.endswith(".jpg")]

# Camera calibration matrix
K = np.array([[800, 0, 960],
              [0, 800, 540],
              [0, 0, 1]])

# Feature detector and descriptor
detector = cv2.SIFT_create()

# Create FLANN-based matcher
index_params = dict(algorithm=0, trees=5)  # Dummy value for 'algorithm'
search_params = dict(checks=50)
matcher = cv2.FlannBasedMatcher(index_params, search_params)

# Find feature matches between image pairs
def match_images(img1, img2):
    kp1, des1 = detector.detectAndCompute(img1, None)
    kp2, des2 = detector.detectAndCompute(img2, None)
    matches = matcher.knnMatch(des1, des2, k=2)
    # Apply ratio test to filter good matches
    good_matches = []
    for m, n in matches:
        if m.distance < 0.7 * n.distance:
            good_matches.append(m)
    return kp1, kp2, good_matches

# Estimate camera poses from feature matches
def estimate_pose(kp1, kp2, matches):
    points1 = np.array([kp1[m.queryIdx].pt for m in matches])
    points2 = np.array([kp2[m.trainIdx].pt for m in matches])
    E, mask = cv2.findEssentialMat(points1, points2, K)
    _, R, t, mask = cv2.recoverPose(E, points1, points2, K)
    return R, t

# Triangulate 3D points from camera poses and feature matches
def triangulate_points(kp1, kp2, matches, R1, t1, R2, t2):
    points1 = np.array([kp1[m.queryIdx].pt for m in matches])
    points2 = np.array([kp2[m.trainIdx].pt for m in matches])
    proj1 = K.dot(np.hstack((R1, t1)))
    proj2 = K.dot(np.hstack((R2, t2)))
    points4D = cv2.triangulatePoints(proj1, proj2, points1.T, points2.T)
    points3D = cv2.convertPointsFromHomogeneous(points4D.T)
    return points3D.squeeze()

# Resize image while maintaining aspect ratio
def resize_image(img, max_dim):
    height, width = img.shape[:2]
    if height > width:
        new_height = max_dim
        new_width = int(width * (max_dim / height))
    else:
        new_width = max_dim
        new_height = int(height * (max_dim / width))
    resized_img = cv2.resize(img, (new_width, new_height))
    return resized_img

# Perform SFM pipeline
image_points = []
camera_poses = []
for i in range(len(image_paths)):
    # Load image
    img = cv2.imread(image_paths[i])
    
    # Resize image to reduce processing time
    max_dim = 1400
    resized_img = resize_image(img, max_dim)

    # Extract features and match with previous image
    if i == 0:
        image_points.append(None)
        camera_poses.append(np.eye(3, 4))
    else:
        kp1, kp2, matches = match_images(prev_img, resized_img)
        R, t = estimate_pose(kp1, kp2, matches)
        camera_poses.append(np.hstack((R, t)))
        points3D = triangulate_points(kp1, kp2, matches, np.eye(3), np.zeros((3, 1)), R, t)
        image_points.append(points3D)
    
    # Store current image for the next iteration
    prev_img = resized_img
    

# Flatten image_points into a 2D array
data = np.concatenate(image_points[1:], axis=0)

# Save the array to a CSV file
np.savetxt("Box_2.csv", data, delimiter=",", header="", comments="")

# Visualization using Pyvista library mesh approach
points = np.genfromtxt('Box_1.csv', delimiter=",", dtype=np.float32)
point_cloud = pv.PolyData(points)
mesh = point_cloud.reconstruct_surface()
mesh.save('mesh_Box_1.stl')
mesh.plot(color='red')

I'm trying to use the OpenCV library to actually build a structure from motion pipeline that reconstructs a 3D model from multi-view 2D images of a random object and visualize the 3D model using mesh technology from the Pyvista library in Jupyter notebook. However, it seems that there might be some mistakes in the process of the SFM pipeline which resulted in a poor result of the 3D point cloud obtained after going through the pipeline. The mesh visualization that I've generated from the 3D point cloud looks nothing like the object I've taken. The object that I have taken is basically a box but the result from the mesh visualization looks nothing like a box. Note that I have taken multiple images of the object(box) from multiple views

I'm aware of the camera calibration matrix (K) that needs some modification but I am worried that my SFM pipeline or the functions are not correctly coded especially the camera poses estimation function or the triangulation function as there are only few open source code that I can refer to to build this pipeline in python.

If anybody that are highly experienced in this field of project could check out the pipeline that I have built to see if they're correctly put together, I would greatly appreciate it!

Can you check that your 3D points are well reprojected in each images ? — Lelouch, Jun 14 '23 at 18:55
@Lelouch I've tried checking the reprojection error using the code below and the 3D points of each image have at least over 500+ projection errors. ~~~ # Project 3D points and calculate reprojection errors proj_points_homogeneous = np.dot(np.hstack((R, t)), np.vstack((points3D.T, np.ones((1, points3D.shape[0]))))) proj_points = proj_points_homogeneous[:2, :] / proj_points_homogeneous[2, :] # Convert kp2 to NumPy array kp2_np = np.array([kp2[m.trainIdx].pt for m in matches]) reprojection_errors.append(np.linalg.norm(proj_points - kp2_np[:, np.newaxis].T, axis=0)) ~~~ — ALVIN, Jun 17 '23 at 11:31
@Lelouch Thanks for the advice, I will check through my function for the Triangulation process with available sources. If you would recommend a successful open-source code for the SFM pipeline in Python (OpenCV), that would be great! — ALVIN, Jun 18 '23 at 13:17

2D to 3D Model Failure using Structure from Motion (OpenCV Python)

0 Answers0