I would like to perform data augmentation on an image, changing its perspective.
I have tried using albumentations.augmentations.geometric.transforms.Perspective
but it is really very limited. In fact the points are chosen randomly and no constraints can be set.
In particular I would like all the new augmented images to have almost the same perspective of top view (to mean from a security camera).
As an alternative I have built my own rotation matrix, however I have difficulty with the perspective projection of the 3D points into 2D. This the code to transform a set of 4 points (source) into a set of other 4 points (the destination), but it produces weird results:
def perspective_projection_matrix(fov, aspect, near, far):
q = 1 / math.tan(np.radians(fov * 0.5))
a = q / aspect
b = (far + near) / (near - far)
c = (2 * near * far) / (near - far)
Pr = np.array([[a, 0, 0, 0],
[0, q, 0, 0],
[0, 0, b, c],
[0, 0, -1, 0]])
return Pr
def transform_points(points, angles):
p0, p1, p2, p3 = points
h, k, l = angles
T = np.array([0, 0, 10]) # translation vector
Rz = np.array([[np.cos(np.radians(l)), -np.sin(np.radians(l)), 0],
[np.sin(np.radians(l)), np.cos(np.radians(l)), 0],
[0, 0, 1]])
Rx = np.array([[1, 0, 0],
[0, np.cos(np.radians(h)), np.sin(np.radians(h))],
[0, -np.sin(np.radians(h)), np.cos(np.radians(h))]])
Ry = np.array([[np.cos(np.radians(k)), 0, np.sin(np.radians(k))],
[0, 1, 0],
[-np.sin(np.radians(k)), 0, np.cos(np.radians(k))]])
p0 = p0 - T
p1 = p1 - T
p2 = p2 - T
p3 = p3 - T
p0 = np.dot(Ry, p0)
p1 = np.dot(Ry, p1)
p2 = np.dot(Ry, p2)
p3 = np.dot(Ry, p3)
p0 = np.dot(Rz, p0)
p1 = np.dot(Rz, p1)
p2 = np.dot(Rz, p2)
p3 = np.dot(Rz, p3)
p0 = np.dot(Rx, p0)
p1 = np.dot(Rx, p1)
p2 = np.dot(Rx, p2)
p3 = np.dot(Rx, p3)
# Perspective projection
fov = 60 # degrees
aspect = 1 # width/height
near = 0.1 # distance to near plane
far = 10000 # distance to far plane
Pr = perspective_projection_matrix(fov, aspect, near, far)
p0_p = np.dot(Pr, np.append(p0, 1))
p1_p = np.dot(Pr, np.append(p1, 1))
p2_p = np.dot(Pr, np.append(p2, 1))
p3_p = np.dot(Pr, np.append(p3, 1))
p0 = p0_p[:3] / (p0_p[3] + 1e-6)
p1 = p1_p[:3] / (p1_p[3] + 1e-6)
p2 = p2_p[:3] / (p2_p[3] + 1e-6)
p3 = p3_p[:3] / (p3_p[3] + 1e-6)
return p0, p1, p2, p3