0

I would like to perform data augmentation on an image, changing its perspective.

I have tried using albumentations.augmentations.geometric.transforms.Perspective but it is really very limited. In fact the points are chosen randomly and no constraints can be set.

In particular I would like all the new augmented images to have almost the same perspective of top view (to mean from a security camera).

As an alternative I have built my own rotation matrix, however I have difficulty with the perspective projection of the 3D points into 2D. This the code to transform a set of 4 points (source) into a set of other 4 points (the destination), but it produces weird results:

def perspective_projection_matrix(fov, aspect, near, far):
q = 1 / math.tan(np.radians(fov * 0.5))
a = q / aspect
b = (far + near) / (near - far)
c = (2 * near * far) / (near - far)

Pr = np.array([[a, 0, 0, 0],
               [0, q, 0, 0],
               [0, 0, b, c],
               [0, 0, -1, 0]])

return Pr


def transform_points(points, angles):
p0, p1, p2, p3 = points
h, k, l = angles

T = np.array([0, 0, 10])  # translation vector

Rz = np.array([[np.cos(np.radians(l)), -np.sin(np.radians(l)), 0],
               [np.sin(np.radians(l)), np.cos(np.radians(l)), 0],
               [0, 0, 1]])
Rx = np.array([[1, 0, 0],
               [0, np.cos(np.radians(h)), np.sin(np.radians(h))],
               [0, -np.sin(np.radians(h)), np.cos(np.radians(h))]])
Ry = np.array([[np.cos(np.radians(k)), 0, np.sin(np.radians(k))],
               [0, 1, 0],
               [-np.sin(np.radians(k)), 0, np.cos(np.radians(k))]])

p0 = p0 - T
p1 = p1 - T
p2 = p2 - T
p3 = p3 - T

p0 = np.dot(Ry, p0)
p1 = np.dot(Ry, p1)
p2 = np.dot(Ry, p2)
p3 = np.dot(Ry, p3)

p0 = np.dot(Rz, p0)
p1 = np.dot(Rz, p1)
p2 = np.dot(Rz, p2)
p3 = np.dot(Rz, p3)

p0 = np.dot(Rx, p0)
p1 = np.dot(Rx, p1)
p2 = np.dot(Rx, p2)
p3 = np.dot(Rx, p3)

# Perspective projection
fov = 60  # degrees
aspect = 1  # width/height
near = 0.1  # distance to near plane
far = 10000  # distance to far plane

Pr = perspective_projection_matrix(fov, aspect, near, far)

p0_p = np.dot(Pr, np.append(p0, 1))
p1_p = np.dot(Pr, np.append(p1, 1))
p2_p = np.dot(Pr, np.append(p2, 1))
p3_p = np.dot(Pr, np.append(p3, 1))

p0 = p0_p[:3] / (p0_p[3] + 1e-6)
p1 = p1_p[:3] / (p1_p[3] + 1e-6)
p2 = p2_p[:3] / (p2_p[3] + 1e-6)
p3 = p3_p[:3] / (p3_p[3] + 1e-6)

return p0, p1, p2, p3
Userulli
  • 244
  • 1
  • 7

0 Answers0