%matplotlib inline
import itertools
import operator
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import utils
from PIL import Image
np.set_printoptions(precision=2, suppress=True)
def calculate_camera_from_correspondences(space_pts: np.ndarray, img_pts: np.ndarray) -> np.ndarray:
assert space_pts.shape[-1] == 3
assert img_pts.shape[-1] == 2
## Normalization of `space_pts`
t = space_pts.mean(axis=0, keepdims=True)
r_bar = np.linalg.norm(space_pts - t, axis=0).mean()
s = (3 ** 0.5) / r_bar
U = np.diag([s, s, s, 1])
U[:3, 3] = -s * t
## Normalization of `img_pts`
t_prime = img_pts.mean(axis=0, keepdims=True)
r_prime_bar = np.linalg.norm(img_pts - t_prime, axis=0).mean()
s_prime = (2 ** 0.5) / r_prime_bar
T = np.diag([s_prime, s_prime, 1])
T[:2, 2] = -s_prime * t_prime
## DLT
src_homogenized = np.append(space_pts, np.ones((len(space_pts), 1)), axis=1)
src_prime = src_homogenized @ U.T
src_prime_homogenized = src_prime / src_prime[:, 3, np.newaxis]
tgt_homogenized = np.append(img_pts, np.ones((len(img_pts), 1)), axis=1)
tgt_prime = tgt_homogenized @ T.T
tgt_prime_homogenized = tgt_prime / tgt_prime[:, 2, np.newaxis]
# Create the constraints
A = np.zeros((2 * len(space_pts), 3 * src_prime_homogenized.shape[-1]))
for i, (src_prime_i, tgt_prime_i) in enumerate(zip(src_prime_homogenized, tgt_prime_homogenized)):
A[2 * i, 4:8] = -tgt_prime_i[2] * src_prime_i
A[2 * i, 8:12] = tgt_prime_i[1] * src_prime_i
A[(2 * i) + 1, 0:4] = tgt_prime_i[2] * src_prime_i
A[(2 * i) + 1, 8:12] = -tgt_prime_i[0] * src_prime_i
# Get h
_, _, Vt = np.linalg.svd(A)
p = Vt[-1]
P_tilde = p.reshape(tgt_prime_homogenized.shape[-1], src_prime_homogenized.shape[-1])
## Denormalization
P = np.linalg.inv(T) @ P_tilde @ U
return P
def q1a():
raw_anns = pd.read_csv("data/q1/bunny.txt", sep=" ", header=None).to_numpy()
space_pts = raw_anns[:, 2:]
img_pts = raw_anns[:, :2]
P = calculate_camera_from_correspondences(space_pts, img_pts)
bunny_pts = np.load("data/q1/bunny_pts.npy")
bunny_pts_projected = np.squeeze(cv2.perspectiveTransform(bunny_pts[:, np.newaxis], P))
img = np.array(Image.open("data/q1/bunny.jpeg"))
projected_bunny_img = img.copy()
for bunny_pt_projected in bunny_pts_projected:
projected_bunny_img = cv2.circle(projected_bunny_img, bunny_pt_projected.astype(int), radius=3, color=(255, 0, 0), thickness=-1)
plt.title("Surface Points")
plt.axis("off")
plt.imshow(projected_bunny_img)
plt.show()
bbox_pts = np.load("data/q1/bunny_bd.npy").reshape(-1, 3)
bbox_pts_projected = np.squeeze(cv2.perspectiveTransform(bbox_pts[:, np.newaxis], P))
bbox_img = img.copy()
for i in range(0, bbox_pts_projected.shape[0], 2):
bbox_img = cv2.line(bbox_img, bbox_pts_projected[i].astype(int), bbox_pts_projected[i + 1].astype(int), color=(0, 0, 255), thickness=6)
plt.title("Bounding Box")
plt.axis("off")
plt.imshow(bbox_img)
plt.show()
q1a()
(b) Cuboid¶
def q1b():
space_pts = np.array([[-1, 1, 1],
[-1, 1, -1],
[1, 1, -1],
[1, -1, -1],
[-1, -1, -1],
[-1, -1, 1],
[1, -1, 1],
[1, 1, 1]], dtype=float)
img_pts = np.load("data/q1/q1b.npy")
P = calculate_camera_from_correspondences(space_pts[:6], img_pts)
img = np.array(Image.open("data/q1b.webp"))
plt.title("Input Image")
plt.axis("off")
plt.imshow(img)
plt.show()
annotated_img = img.copy()
for i, pt in enumerate(img_pts):
annotated_img = cv2.circle(annotated_img, pt.astype(int), 10, (0, 255, 0), -1)
annotated_img = cv2.putText(annotated_img, str(i), pt.astype(int), cv2.FONT_HERSHEY_DUPLEX, 3, (0, 255, 0), 3, cv2.LINE_AA)
plt.title("Annotated 2D Points")
plt.axis("off")
plt.imshow(annotated_img)
plt.show()
cube_pts_projected = np.squeeze(cv2.perspectiveTransform(space_pts[:, np.newaxis], P))
cube_edges_img = img.copy()
cube_edges = cube_pts_projected[[0, 1, 0, 5, 0, 7, 1, 2, 1, 4, 2, 7, 2, 3, 3, 6, 3, 4, 4, 5, 5, 6, 6, 7]]
for i in range(0, cube_edges.shape[0], 2):
cube_edges_img = cv2.line(cube_edges_img, cube_edges[i].astype(int), cube_edges[i + 1].astype(int), color=(0, 0, 255), thickness=6)
plt.title("Cube Edges")
plt.axis("off")
plt.imshow(cube_edges_img)
plt.show()
q1b()
def find_vanishing_points(points: np.ndarray) -> np.ndarray:
lines = utils.points_to_lines(points)
v = utils.lines_to_points(lines)
return v
(a) Camera calibration from vanishing points¶
def calculate_intrinsics_from_vanishing_points(par_lines: np.ndarray) -> np.ndarray:
assert par_lines.shape[-2:] in {(2, 4), (4, 2)}
# Reshaped
reshaped_pts = par_lines.reshape(-1, 2)
v = find_vanishing_points(reshaped_pts)
all_possible_pairs = list(itertools.combinations(v, 2))
A = np.zeros((len(all_possible_pairs), 4))
for i, (v1, v2) in enumerate(all_possible_pairs):
outer = np.outer(v1, v2)
A[i, 0] = np.diag(outer)[:2].sum()
A[i, 3] = outer[-1, -1]
symmetrized = outer + outer.T
A[i, 1:3] = symmetrized[:2, 2]
_, _, Vt = np.linalg.svd(A)
x = Vt[-1]
assert np.allclose(A @ x, 0)
scale_corrected_x = x / x[-1]
a, b, c, d = scale_corrected_x
omega = np.diag([a, a, d])
omega[:2, 2] = omega[2, :2] = [b, c]
L = np.linalg.cholesky(omega)
K = np.linalg.inv(L.T)
K /= K[-1, -1]
return K, v
def q2a():
img = np.array(Image.open("data/q2a.png"))
par_lines = np.load("data/q2/q2a.npy")
K, v = calculate_intrinsics_from_vanishing_points(par_lines)
v = v[:, :2]
principal_pt = K[:2, 2]
annotated_img = img.copy()
COLORS = [(0, 0, 255), (0, 255, 0), (255, 0, 0)]
for COLOR, lines in zip(COLORS, par_lines):
for x1, y1, x2, y2 in lines:
annotated_img = cv2.circle(annotated_img, (x1, y1), 3, COLOR, -1)
annotated_img = cv2.circle(annotated_img, (x2, y2), 3, COLOR, -1)
annotated_img = cv2.line(annotated_img, (x1, y1), (x2, y2), COLOR, 2)
plt.title("Input Image")
plt.axis("off")
plt.imshow(img)
plt.show()
plt.title("Annotated Parallel Lines")
plt.axis("off")
plt.imshow(annotated_img)
plt.show()
x_min = int(v[:, 0].min())
x_max = int(v[:, 0].max())
y_min = int(v[:, 1].min())
y_max = int(v[:, 1].max())
padding = 300
Ht = np.eye(3)
Ht[:2, 2] = [-x_min + padding, -y_min + padding]
translated_img = cv2.warpPerspective(img, Ht, (x_max - x_min + 2 * padding, y_max - y_min + 2 * padding), borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 255, 255))
v_translated = np.squeeze(cv2.perspectiveTransform(v[:, np.newaxis], Ht))
for i, v_ in enumerate(v_translated):
COLOR = np.random.randint(0, 255, (3,))
translated_img = cv2.circle(translated_img, v_.astype(int), 30, COLOR.tolist(), -1)
translated_img = cv2.line(translated_img, v_.astype(int), v_translated[(i + 1) % v_translated.shape[0]].astype(int), (0, 50, 128), 5)
principal_pt_translated = np.squeeze(cv2.perspectiveTransform(principal_pt[np.newaxis, np.newaxis, :], Ht))
translated_img = cv2.circle(translated_img, principal_pt_translated.astype(int), 20, (255, 0, 0), -1)
plt.title("Vanishing points and principal point")
plt.axis("off")
plt.imshow(translated_img)
plt.show()
print(K)
q2a()
[[1154.18 0. 575.07] [ 0. 1154.18 431.94] [ 0. 0. 1. ]]
3.¶
In my implementation, I first find the vanishing points of the parallel lines and with
Because we are assuming that the camera has zero skew and that the pixels are square, the image of the absolute conic takes the following form
Since we know that given vanishing points and as well as the image of the absolute conic that
then we can determine the free variables by fully multiplying out the previous equation. Realistically there are 3 degrees of freedom, but I'm including 4 here and then normalizing the matrix so that . Thus for each pair of vanishing points, we get one constraint on the 3 free variables of the form
So in my implementation, I take the last right singular vector and make . Then taking the Cholesky decomposition of and the fact that ,
We can then just get the principal point from the last column of .
(b) Camera calibration from metric planes¶
def calculate_intrinsics_from_metric_planes(squares: np.ndarray) -> np.ndarray:
assert squares.shape[-2:] == (4, 2)
# For each square, compute the homography H that maps its corner points
unit_corners = np.array([[0, 1],
[1, 1],
[1, 0],
[0, 0]], dtype=float)
triu_indices = np.triu_indices(3)
A = np.zeros((2 * squares.shape[0], len(triu_indices[0])))
for i, square in enumerate(squares):
H = utils.calculate_homography_from_correspondences(unit_corners, square)
h1 = H[:, 0]
h2 = H[:, 1]
outer_h1_h2 = np.outer(h1, h2)
symmetrized_h1_h2 = outer_h1_h2 + outer_h1_h2.T
np.fill_diagonal(symmetrized_h1_h2, np.diag(outer_h1_h2))
A[2 * i] = symmetrized_h1_h2[triu_indices]
outer_h1_h1 = np.outer(h1, h1)
symmetrized_h1_h1 = 2 * outer_h1_h1
np.fill_diagonal(symmetrized_h1_h1, np.diag(outer_h1_h1))
outer_h2_h2 = np.outer(h2, h2)
symmetrized_h2_h2 = 2 * outer_h2_h2
np.fill_diagonal(symmetrized_h2_h2, np.diag(outer_h2_h2))
symmetrized_h2_minus_h1 = symmetrized_h2_h2 - symmetrized_h1_h1
A[(2 * i) + 1] = symmetrized_h2_minus_h1[triu_indices]
_, _, Vt = np.linalg.svd(A)
x = Vt[-1]
# assert np.allclose(A @ x, 0)
omega = np.zeros((3, 3), dtype=float)
omega[triu_indices] = x
omega += omega.T
np.fill_diagonal(omega, np.diag(omega / 2))
omega /= omega[-1, -1]
L = np.linalg.cholesky(omega)
K = np.linalg.inv(L.T)
K /= K[-1, -1]
return K
def calculate_plane_normals(rectangles: np.ndarray, K: np.ndarray) -> np.ndarray:
normals = np.zeros((rectangles.shape[0], 3))
for i, rectangle in enumerate(rectangles):
v_1 = np.squeeze(find_vanishing_points(rectangle))
v_2 = np.squeeze(find_vanishing_points(np.roll(rectangle, 1, axis=0)))
d_1 = np.linalg.inv(K) @ v_1
d_2 = np.linalg.inv(K) @ v_2
normal = np.cross(d_1, d_2)
normal /= np.linalg.norm(normal)
normals[i] = normal
return normals
def q2b():
img = np.array(Image.open("data/q2b.png"))
squares = np.load("data/q2/q2b.npy")
K = calculate_intrinsics_from_metric_planes(squares)
plt.title("Input Image")
plt.axis("off")
plt.imshow(img)
plt.show()
for i, square in enumerate(squares, start=1):
annotated_square_img = cv2.fillPoly(img.copy(), pts=[square.astype(np.int32)], color=(255, 255, 255))
plt.title(f"Annotated Square {i}")
plt.axis("off")
plt.imshow(annotated_square_img)
plt.show()
normals = calculate_plane_normals(squares, K)
for i in range(len(normals) - 1):
normal_i = normals[i]
for j in range(i + 1, len(normals)):
normal_j = normals[j]
cosine = normal_i @ normal_j
angle_radians = np.arccos(cosine)
angle_degrees = angle_radians * 180 / np.pi
print(f"Angle between Plane {i + 1} and Plane {j + 1} (degrees): {angle_degrees:.2f}")
print(K)
q2b()
Angle between Plane 1 and Plane 2 (degrees): 112.72 Angle between Plane 1 and Plane 3 (degrees): 92.20 Angle between Plane 2 and Plane 3 (degrees): 85.29 [[1076.93 -4.53 511.57] [ 0. 1076.27 395.53] [ 0. 0. 1. ]]
4.¶
In my implementation for each square, I compute the homography that takes the unit square to each annotated square. If then we get two constraints on the image of the absolute conic for each square, namely that
If is in the form
then for each square, the constraints are in the form
Given there are 5 degrees of freedom and we get 2 constraints for each of the 3 squares, we can determine all of the free variables. The last right singular vector is taken again, is formed, the Cholesky decomposition is done, and then we determine like done in Q2a.
The normals of each of the planes can be determined by the relationship
We can normalize and then take the arc cosine to get the angle between planes.
(c) Camera calibration from rectangles with known sizes¶
def calculate_intrinsics_from_rectangles(rectangles: np.ndarray, aspect_ratios: np.ndarray) -> np.ndarray:
assert rectangles.shape[-2:] == (4, 2)
# For each rectangle, compute the homography H that maps its corner points
rectangle_corners = []
for aspect_ratio in aspect_ratios:
corners = np.array([[0, aspect_ratio],
[1, aspect_ratio],
[1, 0],
[0, 0]])
rectangle_corners.append(corners)
triu_indices = np.triu_indices(3)
A = np.zeros((2 * rectangles.shape[0], len(triu_indices[0])))
for i, (rectangle, corners) in enumerate(zip(rectangles, rectangle_corners)):
H = utils.calculate_homography_from_correspondences(corners, rectangle)
h1 = H[:, 0]
h2 = H[:, 1]
outer_h1_h2 = np.outer(h1, h2)
symmetrized_h1_h2 = outer_h1_h2 + outer_h1_h2.T
np.fill_diagonal(symmetrized_h1_h2, np.diag(outer_h1_h2))
A[2 * i] = symmetrized_h1_h2[triu_indices]
outer_h1_h1 = np.outer(h1, h1)
symmetrized_h1_h1 = 2 * outer_h1_h1
np.fill_diagonal(symmetrized_h1_h1, np.diag(outer_h1_h1))
outer_h2_h2 = np.outer(h2, h2)
symmetrized_h2_h2 = 2 * outer_h2_h2
np.fill_diagonal(symmetrized_h2_h2, np.diag(outer_h2_h2))
symmetrized_h2_minus_h1 = symmetrized_h2_h2 - symmetrized_h1_h1
A[(2 * i) + 1] = symmetrized_h2_minus_h1[triu_indices]
_, _, Vt = np.linalg.svd(A)
x = Vt[-1]
# assert np.allclose(A @ x, 0)
omega = np.zeros((3, 3), dtype=float)
omega[triu_indices] = x
omega += omega.T
np.fill_diagonal(omega, np.diag(omega / 2))
omega /= omega[-1, -1]
L = np.linalg.cholesky(omega)
K = np.linalg.inv(L.T)
K /= K[-1, -1]
return K
def q2c():
img = np.array(Image.open("data/q2c.jpg"))
rectangles = np.load("data/q2/q2c.npy")
aspect_ratios = np.array([1964/3024, 22.12/31.26, 178.5/247.6])
K = calculate_intrinsics_from_rectangles(rectangles, aspect_ratios)
plt.title("Input Image")
plt.axis("off")
plt.imshow(img)
plt.show()
annotated_rectangle_img = img.copy()
COLORS = [(0, 255, 0), (255, 0, 0), (0, 0, 255)]
for COLOR, rectangle in zip(COLORS, rectangles):
for i, corner in enumerate(rectangle):
next_corner = rectangle[(i + 1) % rectangle.shape[0]]
annotated_rectangle_img = cv2.line(annotated_rectangle_img, corner.astype(int), next_corner.astype(int), COLOR, thickness=10)
annotated_rectangle_img = cv2.putText(annotated_rectangle_img, str(i), corner.astype(int), cv2.FONT_HERSHEY_DUPLEX, 5, COLOR, 5, cv2.LINE_AA)
plt.title("Annotated Rectangles")
plt.axis("off")
plt.imshow(annotated_rectangle_img)
plt.show()
normals = calculate_plane_normals(rectangles, K)
for i in range(len(normals) - 1):
normal_i = normals[i]
for j in range(i + 1, len(normals)):
normal_j = normals[j]
cosine = normal_i @ normal_j
angle_radians = np.arccos(cosine)
angle_degrees = angle_radians * 180 / np.pi
print(f"Angle between Plane {i + 1} and Plane {j + 1} (degrees): {angle_degrees:.2f}")
print(K)
q2c()
Angle between Plane 1 and Plane 2 (degrees): 62.41 Angle between Plane 1 and Plane 3 (degrees): 130.36 Angle between Plane 2 and Plane 3 (degrees): 134.84 [[2437.43 -26.62 2039.82] [ 0. 2499.63 1612.77] [ 0. 0. 1. ]]
5.¶
The algorithm is essentially the same as with Q2b, but instead of finding the homography from the unit square to each annotated square, we need to map the rectangle with the same height:width ratio to the annotated rectangle. That is the only diference really.
def find_enclosed_pixels(polygon_boundaries: np.ndarray) -> np.ndarray:
min_x = int(polygon_boundaries[:, 0].min())
max_x = int(polygon_boundaries[:, 0].max())
min_y = int(polygon_boundaries[:, 1].min())
max_y = int(polygon_boundaries[:, 1].max())
enclosed_pixels = []
for i in range(min_x, max_x + 1):
for j in range(min_y, max_y + 1):
if cv2.pointPolygonTest(polygon_boundaries, [i, j], True) >= 0:
enclosed_pixels.append([i, j])
return np.array(enclosed_pixels)
def single_view_reconstruction(img: np.ndarray, plane_boundaries: np.ndarray, reference_depth: float) -> tuple[np.ndarray, np.ndarray]:
# Use Q2a to compute `K`
K, _ = calculate_intrinsics_from_vanishing_points(np.stack([plane_boundaries[0], np.roll(plane_boundaries[0], 1, axis=0), plane_boundaries[1]]))
# Annotate plane boundaries with corner points
# Compute plane normals
normals = calculate_plane_normals(plane_boundaries, K)
# cv2 point polygon test
point_cloud = dict()
# Compute rays for each point. Pick one point as reference and set its depth
# Let's just pick the first point and set it's depth to `reference_depth`
reference_pixel = plane_boundaries[0, 0]
reference_ray = np.linalg.inv(K) @ np.append(reference_pixel, 1)
reference_pt = reference_ray * (reference_depth / reference_ray[-1])
reference_pixel_idx = tuple(reference_pixel[::-1].tolist())
point_cloud[reference_pixel_idx] = (reference_pt, img[reference_pixel_idx])
# Assuming that `point_cloud` will have at least one common point with another
for plane, normal in zip(plane_boundaries, normals):
enclosed_pixels = find_enclosed_pixels(plane)
# Compute plane equation given the known 3D point
# n^T\tilde{X} + a = 0
# \tilde{X} = K^{-1} u
reference_pixel = next(iter(filter(lambda x: tuple(x[::-1].tolist()) in point_cloud, enclosed_pixels)))
reference_pt, _ = point_cloud[tuple(reference_pixel[::-1].tolist())]
a = -normal @ reference_pt
for pixel in enclosed_pixels:
pixel_idx = tuple(pixel[::-1].tolist())
if pixel_idx in point_cloud:
continue
d = np.linalg.inv(K) @ np.append(pixel, 1)
s = -a / (normal @ d)
point_cloud[pixel_idx] = (s * d, img[pixel_idx])
# \lambda n1 x1 + \lambda n2 x2 + \lambda n3 + a = 0
# \lambda (n1 x1 + n2 x2 + n3) = -a
# \lambda = -a / normal^T\tildeX
# Compute 3D coordinate of all points on the plane via ray-plane intersection
# Repeat the above two steps to obtain equations for all planes (and all 3D points).
points = np.stack(list(map(operator.itemgetter(0), point_cloud.values())))
colors = np.stack(list(map(operator.itemgetter(1), point_cloud.values())))
return points, colors
def q3(image_name: str):
img = np.array(Image.open(f"data/{image_name}"))
plane_boundaries = np.load(f"data/q3/{os.path.splitext(image_name)[0]}.npy")
points, colors = single_view_reconstruction(img, plane_boundaries, 10.)
plt.title("Input Image")
plt.axis("off")
plt.imshow(img)
plt.show()
annotated_img = img.copy()
COLORS = [(0, 255, 0), (0, 255, 255), (255, 255, 0), (255, 0, 0), (0, 0, 255)]
for i in range(plane_boundaries.shape[0]):
COLOR = COLORS[i]
square = plane_boundaries[i]
for j in range(square.shape[0]):
x, y = square[j]
annotated_img = cv2.circle(annotated_img, (x, y), 3, COLOR, -1)
annotated_img = cv2.putText(annotated_img, str(j+i*4), (x, y), cv2.FONT_HERSHEY_DUPLEX, 1, COLOR, 1, cv2.LINE_AA)
annotated_img = cv2.line(annotated_img, square[j], square[(j+1) % 4], COLOR, 2)
plt.title("Annotations")
plt.axis("off")
plt.imshow(annotated_img)
plt.show()
fig = plt.figure()
ax = fig.add_subplot(projection='3d')
ax.set_axis_off()
ax.scatter(points[:, 0], points[:, 1], points[:, 2], c=colors / 255.)
plt.show()
q3("q3.png")


2.¶
I use Q2a to calculate . Then, I compute the plane normals with the vanishing points of the perpendicular directions. I pick the first point as the reference point with some reference depth (i.e. calculating such that ). Given the plane equation and the reference point, we can determine for a given plane. Afterwards, for each pixel in each plane, we can find the depths of each ray given this because of the plane equation. I keep all of these pixels in a dictionary so that I have a reference to what the 3D point is for consistency in the 3D reconstruction.
(b)¶
imgs = [f"q3b{i}.jpg" for i in range(1, 4)]
for img in imgs:
q3(img)


