import cv2
import numpy as np
import os
import pyvista as pv
# Input directory containing images
input_dir = "C:/Users/Pictures/FYP/Box2"
# Get a list of all image file paths in the directory
image_paths = [os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.endswith(".jpg")]
# Camera calibration matrix
K = np.array([[800, 0, 960],
[0, 800, 540],
[0, 0, 1]])
# Feature detector and descriptor
detector = cv2.SIFT_create()
# Create FLANN-based matcher
index_params = dict(algorithm=0, trees=5) # Dummy value for 'algorithm'
search_params = dict(checks=50)
matcher = cv2.FlannBasedMatcher(index_params, search_params)
# Find feature matches between image pairs
def match_images(img1, img2):
kp1, des1 = detector.detectAndCompute(img1, None)
kp2, des2 = detector.detectAndCompute(img2, None)
matches = matcher.knnMatch(des1, des2, k=2)
# Apply ratio test to filter good matches
good_matches = []
for m, n in matches:
if m.distance < 0.7 * n.distance:
good_matches.append(m)
return kp1, kp2, good_matches
# Estimate camera poses from feature matches
def estimate_pose(kp1, kp2, matches):
points1 = np.array([kp1[m.queryIdx].pt for m in matches])
points2 = np.array([kp2[m.trainIdx].pt for m in matches])
E, mask = cv2.findEssentialMat(points1, points2, K)
_, R, t, mask = cv2.recoverPose(E, points1, points2, K)
return R, t
# Triangulate 3D points from camera poses and feature matches
def triangulate_points(kp1, kp2, matches, R1, t1, R2, t2):
points1 = np.array([kp1[m.queryIdx].pt for m in matches])
points2 = np.array([kp2[m.trainIdx].pt for m in matches])
proj1 = K.dot(np.hstack((R1, t1)))
proj2 = K.dot(np.hstack((R2, t2)))
points4D = cv2.triangulatePoints(proj1, proj2, points1.T, points2.T)
points3D = cv2.convertPointsFromHomogeneous(points4D.T)
return points3D.squeeze()
# Resize image while maintaining aspect ratio
def resize_image(img, max_dim):
height, width = img.shape[:2]
if height > width:
new_height = max_dim
new_width = int(width * (max_dim / height))
else:
new_width = max_dim
new_height = int(height * (max_dim / width))
resized_img = cv2.resize(img, (new_width, new_height))
return resized_img
# Perform SFM pipeline
image_points = []
camera_poses = []
for i in range(len(image_paths)):
# Load image
img = cv2.imread(image_paths[i])
# Resize image to reduce processing time
max_dim = 1400
resized_img = resize_image(img, max_dim)
# Extract features and match with previous image
if i == 0:
image_points.append(None)
camera_poses.append(np.eye(3, 4))
else:
kp1, kp2, matches = match_images(prev_img, resized_img)
R, t = estimate_pose(kp1, kp2, matches)
camera_poses.append(np.hstack((R, t)))
points3D = triangulate_points(kp1, kp2, matches, np.eye(3), np.zeros((3, 1)), R, t)
image_points.append(points3D)
# Store current image for the next iteration
prev_img = resized_img
# Flatten image_points into a 2D array
data = np.concatenate(image_points[1:], axis=0)
# Save the array to a CSV file
np.savetxt("Box_2.csv", data, delimiter=",", header="", comments="")
# Visualization using Pyvista library mesh approach
points = np.genfromtxt('Box_1.csv', delimiter=",", dtype=np.float32)
point_cloud = pv.PolyData(points)
mesh = point_cloud.reconstruct_surface()
mesh.save('mesh_Box_1.stl')
mesh.plot(color='red')
I'm trying to use the OpenCV library to actually build a structure from motion pipeline that reconstructs a 3D model from multi-view 2D images of a random object and visualize the 3D model using mesh technology from the Pyvista library in Jupyter notebook. However, it seems that there might be some mistakes in the process of the SFM pipeline which resulted in a poor result of the 3D point cloud obtained after going through the pipeline. The mesh visualization that I've generated from the 3D point cloud looks nothing like the object I've taken. The object that I have taken is basically a box but the result from the mesh visualization looks nothing like a box. Note that I have taken multiple images of the object(box) from multiple views
I'm aware of the camera calibration matrix (K) that needs some modification but I am worried that my SFM pipeline or the functions are not correctly coded especially the camera poses estimation function or the triangulation function as there are only few open source code that I can refer to to build this pipeline in python.
If anybody that are highly experienced in this field of project could check out the pipeline that I have built to see if they're correctly put together, I would greatly appreciate it!