%matplotlib inline
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from IPython.core.interactiveshell import InteractiveShell
from PIL import Image
from utils import cosine, my_warp, normalize
np.set_printoptions(suppress=True, precision=4)
InteractiveShell.ast_node_interactivity = "all"
Useful Helper Functions and Global Variables¶
ROOT_DIR = Path().parent
COLORS = None
def plot_lines(lines: np.ndarray, img: np.ndarray) -> np.ndarray:
img_copy = img.copy()
mean_size = np.mean(img.shape[:2])
thickness = int(0.01 * mean_size)
global COLORS
if COLORS is None:
COLORS = []
for i in range(0, len(lines), 4):
try:
color = COLORS[i // 4]
except IndexError:
color = np.random.randint(0, 256, (3,))
COLORS.append(color)
img_copy = cv2.line(img_copy, lines[i].astype(int), lines[i + 1].astype(int), color.tolist(), thickness=thickness)
img_copy = cv2.line(img_copy, lines[i + 2].astype(int), lines[i + 3].astype(int), color.tolist(), thickness=thickness)
return img_copy
def points_to_lines(points: np.ndarray) -> np.ndarray:
assert len(points) % 2 == 0
points_copy = points.copy()
if points.shape[1] == 2:
points_copy = np.append(points_copy, np.ones((len(points), 1)), axis=1)
lines = []
for i in range(0, len(points), 2):
lines.append(normalize(np.cross(points_copy[i], points_copy[i + 1])))
assert np.isclose(lines[-1] @ points_copy[i], 0)
assert np.isclose(lines[-1] @ points_copy[i + 1], 0)
lines = np.array(lines)
return lines
def lines_to_points(lines: np.ndarray) -> np.ndarray:
assert len(lines) % 2 == 0
points = []
for i in range(0, len(lines), 2):
points.append(np.cross(lines[i], lines[i + 1]))
assert np.isclose(lines[i] @ points[-1], 0)
assert np.isclose(lines[i + 1] @ points[-1], 0)
points = np.array(points)
# Homogenizing points
points /= points[:, 2, np.newaxis]
return points
Q1: Affine Rectification¶
def affine_rectification(points: np.ndarray, max_size: float) -> np.ndarray:
# Assuming `points` is of shape (8, 2)
assert points.shape == (8, 2)
normal_points = points / max_size
S = np.diag([1 / max_size, 1 / max_size, 1])
# Find l'_\infty
# Calculate the lines for each pair of points
lines_normalized = points_to_lines(normal_points)
# Calculate the intersection for each pair of lines
intersects_normalized = lines_to_points(lines_normalized)
# Calculate l'_\infty by getting the line between the two intersection points
l_infty_normalized = np.cross(intersects_normalized[0], intersects_normalized[1])
l_infty_normalized /= l_infty_normalized[2]
H = np.eye(3)
H[2] = l_infty_normalized
return np.linalg.inv(S) @ H @ S
def q1(image_name: str):
# Affine rectification
my_annotations_path = ROOT_DIR / "data/annotation/q1_annotation.npy"
annotations = np.load(my_annotations_path, allow_pickle=True).item()
key = os.path.splitext(image_name)[0]
curr_annotation = annotations.get(key)
curr_points = curr_annotation[:8]
test_points = curr_annotation[8:]
img = Image.open(ROOT_DIR / "data/q1" / image_name)
img = np.array(img)
H = affine_rectification(curr_points, max(img.shape[:2]))
affine_rectified_img, H_final = my_warp(img, H)
plt.title("Input Image")
plt.axis("off")
plt.imshow(img)
plt.show()
plt.title("Annotated Parallel Lines on Input Image")
plt.axis("off")
plt.imshow(plot_lines(curr_points, img))
plt.show()
plt.title("Affine-Rectified Image")
plt.axis("off")
plt.imshow(affine_rectified_img)
plt.show()
test_points_prime = np.squeeze(cv2.perspectiveTransform(test_points[:, np.newaxis], H))
orig_lines = points_to_lines(test_points)
transformed_lines = points_to_lines(test_points_prime)
print(f"Cosine of 1st pair of lines: {cosine(*orig_lines[:2]):.4f} -> {cosine(*transformed_lines[:2]):.4f}")
print(f"Cosine of 2nd pair of lines: {cosine(*orig_lines[2:]):.4f} -> {cosine(*transformed_lines[2:]):.4f}")
plt.title("Test Lines on Input Image")
plt.axis("off")
plt.imshow(plot_lines(test_points, img))
plt.show()
transformed_test_points = np.squeeze(cv2.perspectiveTransform(test_points[:, np.newaxis], H_final))
plt.title("Test Lines on Affine-Rectified Image")
plt.axis("off")
plt.imshow(plot_lines(transformed_test_points, affine_rectified_img))
plt.show()
image_names = ["house.webp", "shower.jpg", "checker1.jpg", "tiles5.jpg", "facade.jpg"]
for i, image_name in enumerate(image_names):
q1(image_name)
Cosine of 1st pair of lines: 0.9988 -> 1.0000 Cosine of 2nd pair of lines: 1.0000 -> 1.0000
Cosine of 1st pair of lines: 0.9537 -> 0.9995 Cosine of 2nd pair of lines: 0.9987 -> 0.9999
Cosine of 1st pair of lines: 0.8964 -> 1.0000 Cosine of 2nd pair of lines: 0.9575 -> 1.0000
Cosine of 1st pair of lines: 0.9868 -> 0.9999 Cosine of 2nd pair of lines: 0.9987 -> 0.9999
Cosine of 1st pair of lines: 0.7842 -> 1.0000 Cosine of 2nd pair of lines: 1.0000 -> 1.0000
1.4¶
Given 8 points which denote 2 pairs of parallel lines, my implementation first scales the pixel coordinates down by the max of the height $h$ and width $w$ of the image so that the cross products are more well behaved. In other words, we are multiplying the homogenized pixel coordinates by the matrix $S$ given by
$$S = \begin{pmatrix}\cfrac{1}{\max(h, w)}&0&0\\0&\cfrac{1}{\max(h, w)}&0\\0&0&1\end{pmatrix}.$$
My implementation then converts the pairs of points to lines by using the property that
$$l = x_1 \times x_2.$$
After getting equations for lines, we can then find the points that these parallel lines intersect with the dual property:
$$\tilde{x} = l_1 \times l_2.$$
Finally, we can find the image of the line at infinity by taking the cross product of these intersecting points:
$$l'_\infty = \tilde{x}_1 \times \tilde{x}_2.$$
As mentioned during lecture, the homography $$H$$ is
$$H_{\text{affine}} = \begin{pmatrix}1&0&0\\0&1&0\\l_1&l_2&l_3\end{pmatrix}\qquad\text{given that }\quad l'_\infty = \begin{pmatrix}l_1\\l_2\\l_3\end{pmatrix}.$$
To undo the scaling, we need to left multiply by $S^{-1}$. Thus, the final homography after affine rectification is
$$H = S^{-1} H_{\text{affine}} S.$$
The first 8 annotations for each image were used to find the image of the line at infinity.
Q2: Metric Rectification¶
def metric_rectification(points: np.ndarray, max_size: float, affine_rectified: bool = False) -> np.ndarray:
normal_points = points / max_size
S = np.diag([1 / max_size, 1 / max_size, 1])
# Find $C_\infty^*'$
# Calculate the lines for each pair of points
lines = points_to_lines(normal_points)
# Calculate the constraint coefficients on the entries of $C_\infty^*'$ from $l'^T C_\infty^*' m' = 0$
num_pairs = len(lines) // 2
triu_indices = np.triu_indices(2 if affine_rectified else 3)
A = np.zeros((num_pairs, len(np.transpose(triu_indices))))
for i in range(num_pairs):
l_1 = lines[2 * i]
l_2 = lines[(2 * i) + 1]
outer = np.outer(l_1, l_2)
sym_coefs = (outer + outer.T) / 2
coefs = sym_coefs[triu_indices]
A[i] = coefs
# Solve for $c$
_, Sigma, Vt = np.linalg.svd(A)
c = Vt[-1]
assert np.allclose(A @ c, 0)
# For affine-rectified images, d = e = f = 0
C_star_prime_infty = np.zeros((3, 3))
C_star_prime_infty[triu_indices] = c
C_star_prime_infty = (C_star_prime_infty + C_star_prime_infty.T) / 2
# Compute $H$ such that $C_\infty^* = H C_\infty^*' H^T$ using SVD
U, Sigma, _ = np.linalg.svd(C_star_prime_infty)
Sigma_sqrt_reciprocal = Sigma.copy()
Sigma_sqrt_reciprocal[2] = 1
Sigma_sqrt_reciprocal = Sigma_sqrt_reciprocal ** -0.5
H = np.diag(Sigma_sqrt_reciprocal) @ U.T
# assert np.allclose(H @ C_star_prime_infty @ H.T, np.diag([1, 1, 0]))
return np.linalg.inv(S) @ H @ S
def q2(image_name: str):
# Metric rectification
my_annotations_path = ROOT_DIR / "data/annotation/q2_annotation.npy"
perp_annotations = np.load(my_annotations_path, allow_pickle=True).item()
par_annotation_path = my_annotations_path.parent / my_annotations_path.name.replace("2", "1")
par_annotations = np.load(par_annotation_path, allow_pickle=True).item()
key = os.path.splitext(image_name)[0]
par_curr_annotation = par_annotations.get(key)
perp_curr_annotation = perp_annotations.get(key)
par_curr_points = par_curr_annotation[:8]
perp_curr_points = perp_curr_annotation[:8]
perp_test_points = perp_curr_annotation[8:]
img = Image.open(ROOT_DIR / "data/q1" / image_name)
img = np.array(img)
max_size = max(img.shape[:2])
H_affine = affine_rectification(par_curr_points, max_size)
perp_affine_rectified_points = np.squeeze(cv2.perspectiveTransform(perp_curr_points[:, np.newaxis], H_affine))
H_metric = metric_rectification(perp_affine_rectified_points, max_size, affine_rectified=True)
H = H_metric @ H_affine
affine_rectified_img, H_affine = my_warp(img, H_affine)
affine_rectified_curr_points = np.squeeze(cv2.perspectiveTransform(perp_curr_points[:, np.newaxis], H_affine))
metric_affine_rectified_img, H_final = my_warp(img, H)
plt.title("Input Image")
plt.axis("off")
plt.imshow(img)
plt.show()
plt.title("Annotated Perpendicular Lines on Input Image")
plt.axis("off")
plt.imshow(plot_lines(perp_curr_points, img))
plt.show()
plt.title("Annotated Perpendicular Lines on Affine-Rectified Image")
plt.axis("off")
plt.imshow(plot_lines(affine_rectified_curr_points, affine_rectified_img))
plt.show()
plt.title("Rectified Image")
plt.axis("off")
plt.imshow(metric_affine_rectified_img)
plt.show()
test_points_prime = np.squeeze(cv2.perspectiveTransform(perp_test_points[:, np.newaxis], H))
orig_lines = points_to_lines(perp_test_points)
transformed_lines = points_to_lines(test_points_prime)
print(f"Cosine of 1st pair of lines: {cosine(*orig_lines[:2]):.4f} -> {cosine(*transformed_lines[:2]):.4f}")
print(f"Cosine of 2nd pair of lines: {cosine(*orig_lines[2:]):.4f} -> {cosine(*transformed_lines[2:]):.4f}")
plt.title("Test Lines on Input Image")
plt.axis("off")
plt.imshow(plot_lines(perp_test_points, img))
plt.show()
metric_rectified_test_points = np.squeeze(cv2.perspectiveTransform(perp_test_points[:, np.newaxis], H_final))
plt.title("Test Lines on Metric-Rectified Image")
plt.axis("off")
plt.imshow(plot_lines(metric_rectified_test_points, metric_affine_rectified_img))
plt.show()
image_names = ["house.webp", "shower.jpg", "checker1.jpg", "tiles5.jpg", "facade.jpg"]
for i, image_name in enumerate(image_names):
q2(image_name)
Cosine of 1st pair of lines: -0.0509 -> -0.0828 Cosine of 2nd pair of lines: -0.1608 -> -0.0315
Cosine of 1st pair of lines: 0.1924 -> -0.0049 Cosine of 2nd pair of lines: -0.1989 -> 0.0085
Cosine of 1st pair of lines: -0.2527 -> 0.0018 Cosine of 2nd pair of lines: 0.0882 -> 0.0082
Cosine of 1st pair of lines: 0.1677 -> -0.0081 Cosine of 2nd pair of lines: 0.0350 -> -0.0293
Cosine of 1st pair of lines: -0.1919 -> 0.0461 Cosine of 2nd pair of lines: -0.1065 -> 0.0107
2.4¶
Given 8 points which denote 2 pairs of perpendicular lines, my implementation first scales the pixel coordinates down by the max of the height $h$ and width $w$ of the image so that the cross products are more well behaved. In other words, we are multiplying the homogenized pixel coordinates by the matrix $S$ given by
$$S = \begin{pmatrix}\cfrac{1}{\max(h, w)}&0&0\\0&\cfrac{1}{\max(h, w)}&0\\0&0&1\end{pmatrix}.$$
My implementation then converts the pairs of points to lines by using the property that
$$l = x_1 \times x_2.$$
After getting equations for lines, we can then find the image of the dual conic $C_\infty^{*'}$ given the constraint that for images of perpendicular lines $l'$ and $m'$,
$$l'^T C_\infty^{*'} m' = 0.$$
In particular, we know that $C_\infty^{*'}$ will be in the form
$$C_\infty^{*'} = \begin{pmatrix}a&\cfrac{b}{2}&\cfrac{d}{2}\\\cfrac{b}{2}&c&\cfrac{e}{2}\\\cfrac{d}{2}&\cfrac{e}{2}&f\end{pmatrix}.$$
Since we know that the input image is affine-rectified, then we know that $d = e = f = 0$, thereby simplifying the matrix to be of the form
$$C_\infty^{*'} = \begin{pmatrix}a&\cfrac{b}{2}&0\\\cfrac{b}{2}&c&0\\0&0&0\end{pmatrix}.$$
If $l' = \begin{pmatrix}l_1\\l_2\\l_3\end{pmatrix}$ and $m' = \begin{pmatrix}m_1\\m_2\\m_3\end{pmatrix}$, then we get
$$l_1\cdot m_1\cdot a + \cfrac{l_1\cdot m_2 + l_2\cdot m_1}{2} \cdot b + l_2\cdot m_2\cdot c = 0.$$
Let $\mathbf{c} = \begin{pmatrix}a\\b\\c\end{pmatrix}$, then for every pair of perpendicular lines, we get a constraint on $\mathbf{c}$:
$$\begin{pmatrix}l_1\cdot m_1&\cfrac{l_1\cdot m_2 + l_2\cdot m_1}{2}&l_2\cdot m_2\end{pmatrix}\mathbf{c} = 0.$$
We can notice that each one of these coefficients is just the element-wise average of the outer product of the first two entries of $l'$ and $m'$ and its transpose, which is what I used in my implementation. If we stack these constraints row-wise into a matrix $A$, we have an optimization problem. In particular,
$$\min_{||\mathbf{c}||=1} ||A\mathbf{c}||.$$
If we take the SVD decomposition of $A$, then $\mathbf{c}$ is just the last right singular vector (i.e. last row of $V^T$) because it is the unit vector that minimizes this equation. Now that we have $\mathbf{c}$, we can reconstruct $C_\infty^{*'}$. Also, since we know that $C_\infty^* = H C_\infty^{*'} H^T$, we can find $H$ by performing the SVD decomposition on $C_\infty^{*'}$ and get that
$$H_{\text{metric}} = \begin{pmatrix}\sqrt{\sigma_1^{-1}}&0&0\\0&\sqrt{\sigma_2^{-1}}&0\\0&0&1\end{pmatrix}U^T.$$
To undo the scaling, we need to left multiply by $S^{-1}$. Thus, the final homography after affine rectification is
$$H = S^{-1} H_{\text{metric}} S.$$
The first 8 annotations for each image were used to find the image of dual conic of the circular points.
Q3: Planar Homography from Point Correspondences¶
def calculate_homography_from_correspondences(src: np.ndarray, tgt: np.ndarray) -> np.ndarray:
## Normalization of `src`
t = src.mean(axis=0, keepdims=True)
r_bar = np.linalg.norm(src - t, axis=0).mean()
s = (2 ** 0.5) / r_bar
T = np.diag([s, s, 1])
T[:2, 2] = -t
## Normalization of `tgt`
t_prime = tgt.mean(axis=0, keepdims=True)
r_prime_bar = np.linalg.norm(tgt - t_prime, axis=0).mean()
s_prime = (2 ** 0.5) / r_prime_bar
T_prime = np.diag([s_prime, s_prime, 1])
T_prime[:2, 2] = -t_prime
## DLT
src_homogenized = np.append(src, np.ones((len(src), 1)), axis=1)
src_prime = src_homogenized @ T.T
src_prime_homogenized = src_prime / src_prime[:, 2, np.newaxis]
tgt_homogenized = np.append(tgt, np.ones((len(tgt), 1)), axis=1)
tgt_prime = tgt_homogenized @ T_prime.T
tgt_prime_homogenized = tgt_prime / tgt_prime[:, 2, np.newaxis]
# Create the constraints
A = np.zeros((2 * len(src), 9))
for i, (src_prime_i, tgt_prime_i) in enumerate(zip(src_prime_homogenized, tgt_prime_homogenized)):
A[2 * i, 3:6] = -tgt_prime_i[2] * src_prime_i
A[2 * i, 6:9] = tgt_prime_i[1] * src_prime_i
A[(2 * i) + 1, 0:3] = tgt_prime_i[2] * src_prime_i
A[(2 * i) + 1, 6:9] = -tgt_prime_i[0] * src_prime_i
# Get h
_, _, Vt = np.linalg.svd(A)
h = Vt[-1]
H_tilde = h.reshape(3, 3)
## Denormalization
H = np.linalg.inv(T_prime) @ H_tilde @ T
return H
Useful Helper Functions¶
def compose_images_from_correspondences(src_img: np.ndarray, tgt_img: np.ndarray, tgt_points: np.ndarray, clockwise: bool = True) -> np.ndarray:
output_img = tgt_img.copy()
h, w = src_img.shape[:2]
src_points = np.array([[0, 0],
[w - 1, 0],
[w - 1, h - 1],
[0, h - 1]]).astype(float)
if not clockwise:
src_points[[1, 3]] = src_points[[3, 1]]
H = calculate_homography_from_correspondences(src_points, tgt_points)
warped_src_img = cv2.warpPerspective(src_img, H, tgt_img.shape[1::-1])
img_mask = np.ones((h, w, 3))
warped_src_img_mask = cv2.warpPerspective(img_mask, H, tgt_img.shape[1::-1]).astype(bool)
output_img[warped_src_img_mask] = 0
output_img += warped_src_img
return output_img
def annotate_corners(points: np.ndarray, img: np.ndarray) -> np.ndarray:
img_copy = img.copy()
mean_size = np.mean(img.shape[:2])
thickness = int(0.01 * mean_size)
global COLORS
color = COLORS[0].tolist()
for i, point in enumerate(points, start=1):
img_copy = cv2.circle(img_copy, point.astype(int), radius=0, color=color, thickness=thickness * 3)
img_copy = cv2.putText(img_copy, str(i), point.astype(int), cv2.FONT_HERSHEY_SIMPLEX, 2, color=color, thickness=thickness, lineType=cv2.LINE_AA)
return img_copy
def q3(src_img_name: str, tgt_img_name: str):
# Planar Homography from Point Correspondences
my_annotations_path = ROOT_DIR / "data/annotation/q3_annotation.npy"
annotations = np.load(my_annotations_path, allow_pickle=True).item()
src_img = Image.open(ROOT_DIR / "data/q3" / src_img_name)
src_img = np.array(src_img)
tgt_img = Image.open(ROOT_DIR / "data/q3" / tgt_img_name)
tgt_img = np.array(tgt_img)
key = os.path.splitext(tgt_img_name)[0]
tgt_points = annotations.get(key)
composed_img = compose_images_from_correspondences(src_img, tgt_img, tgt_points)
plt.title("Normal Image")
plt.axis("off")
plt.imshow(src_img)
plt.show()
plt.title("Perspective Image")
plt.axis("off")
plt.imshow(tgt_img)
plt.show()
plt.title("Annotated Corners in Perspective Image")
plt.axis("off")
plt.imshow(annotate_corners(tgt_points, tgt_img))
plt.show()
plt.title("Warped and Overlaid Image")
plt.axis("off")
plt.imshow(composed_img)
plt.show()
image_name_tuples = [("trains.jpg", "journal.jpg"), ("desk-normal.png", "desk-perspective.png")]
for image_name_tuple in image_name_tuples:
q3(*image_name_tuple)
3.3¶
Essentially, I just followed the pseudocode given in class. To explain further, my implementation first normalizes the source points so that they are $\sqrt 2$ away from the origin on average and calculates the overall similarity transformation $T$. Afterwards, my implementation then does a similar normalization for the corresponding points in the target image and calculate the similarity transformation $T'$. We know that given a homography $H$, we ideally want $H\mathbf{x} \equiv \mathbf{x}'$, or in other words that $H\mathbf{x} \times \mathbf{x}' = \mathbf{0}$. From some clever identities shown during lecture, we know that for a given pair of corresponding points, we get two constraints on the flattened vector of $H$'s elements $\mathbf{h}$:
$$\begin{pmatrix}\mathbf{0}&-w'\mathbf{x}^T&y'\mathbf{x}^T\\w'\mathbf{x}^T&\mathbf{0}&-x'\mathbf{x}^T\end{pmatrix}\mathbf{h} = \mathbf{0} \qquad \text{given that } \mathbf{x}' = \begin{pmatrix}x'\\y'\\w'\end{pmatrix}.$$
After performing the similarity transformations of $T$ to the source points and $T'$ to the target points and homogenizing them, we can perform the Direct Linear Transformation (DLT) algorithm and get $\mathbf{h}$. From here, we can reshape $\mathbf{h}$ to get $\tilde{H}$ and perform the denormalization step of $(T')^{-1}$ to get the final homography:
$$H = (T')^{-1} \tilde{H} T.$$
Even though the instructions the corresponding points are annotated in anti-clockwise order, they are actually labeled in clockwise order. I added an additional parameter to compose_images_from_correspondences
so that the user can specify whichever annotation order.
Q4: Bonus: Metric Rectification from Perpendicular Lines¶
def q4(image_name: str):
my_annotations_path = ROOT_DIR / "data/annotation/q4_annotation.npy"
perp_annotations = np.load(my_annotations_path, allow_pickle=True).item()
key = os.path.splitext(image_name)[0]
perp_curr_annotation = perp_annotations.get(key)
perp_curr_points = perp_curr_annotation[:20]
perp_test_points = perp_curr_annotation[20:]
img = Image.open(ROOT_DIR / "data/q1" / image_name)
img = np.array(img)
max_size = max(img.shape[:2])
H = metric_rectification(perp_curr_points, max_size)
metric_rectified_img, H_final = my_warp(img, H)
metric_rectified_curr_points = np.squeeze(cv2.perspectiveTransform(perp_curr_points[:, np.newaxis], H_final))
plt.title("Input Image")
plt.axis("off")
plt.imshow(img)
plt.show()
plt.title("Annotated Perpendicular Lines")
plt.axis("off")
plt.imshow(plot_lines(perp_curr_points, img))
plt.show()
plt.title("Rectified Image")
plt.axis("off")
plt.imshow(plot_lines(metric_rectified_curr_points, metric_rectified_img))
plt.show()
test_points_prime = np.squeeze(cv2.perspectiveTransform(perp_test_points[:, np.newaxis], H))
orig_lines = points_to_lines(perp_test_points)
transformed_lines = points_to_lines(test_points_prime)
print(f"Cosine of 1st pair of lines: {cosine(*orig_lines[:2]):.4f} -> {cosine(*transformed_lines[:2]):.4f}")
print(f"Cosine of 2nd pair of lines: {cosine(*orig_lines[2:4]):.4f} -> {cosine(*transformed_lines[2:4]):.4f}")
print(f"Cosine of 3rd pair of lines: {cosine(*orig_lines[4:]):.4f} -> {cosine(*transformed_lines[4:]):.4f}")
plt.title("Test Lines on Input Image")
plt.axis("off")
plt.imshow(plot_lines(perp_test_points, img))
plt.show()
metric_rectified_test_points = np.squeeze(cv2.perspectiveTransform(perp_test_points[:, np.newaxis], H_final))
plt.title("Test Lines on Metric-Rectified Image")
plt.axis("off")
plt.imshow(plot_lines(metric_rectified_test_points, metric_rectified_img))
plt.show()
image_names = ["papers.jpg", "checker1.jpg"]
for i, image_name in enumerate(image_names):
q4(image_name)
Cosine of 1st pair of lines: -0.3933 -> -0.0998 Cosine of 2nd pair of lines: -0.6658 -> -0.0887 Cosine of 3rd pair of lines: -0.5957 -> -0.0441
Cosine of 1st pair of lines: -0.6192 -> -0.1122 Cosine of 2nd pair of lines: 0.4841 -> 0.0304 Cosine of 3rd pair of lines: -0.4757 -> -0.0718
4.4¶
Much of my implementation is the same as that for Q2, just that we know that the image of the dual conic doesn't necessarily have that $d = e = f = 0$. So
$$C_\infty^{*'} = \begin{pmatrix}a&\cfrac{b}{2}&\cfrac{d}{2}\\\cfrac{b}{2}&c&\cfrac{e}{2}\\\cfrac{d}{2}&\cfrac{e}{2}&f\end{pmatrix}.$$
If $l' = \begin{pmatrix}l_1\\l_2\\l_3\end{pmatrix}$ and $m' = \begin{pmatrix}m_1\\m_2\\m_3\end{pmatrix}$, then we get
$$l_1\cdot m_1\cdot a + \cfrac{l_1\cdot m_2 + l_2\cdot m_1}{2} \cdot b + l_2\cdot m_2\cdot c + \cfrac{l_1\cdot m_3 + l_3\cdot m_1}{2}\cdot d + \cfrac{l_2\cdot m_3 + l_3\cdot m_2}{2}\cdot e + l_3\cdot m_3\cdot f= 0.$$
Let $\mathbf{c} = \begin{pmatrix}a\\b\\c\\d\\e\\f\end{pmatrix}$, then for every pair of perpendicular lines, we get a constraint on $\mathbf{c}$:
$$\begin{pmatrix}l_1\cdot m_1&\cfrac{l_1\cdot m_2 + l_2\cdot m_1}{2}&l_2\cdot m_2&\cfrac{l_1\cdot m_3 + l_3\cdot m_1}{2}&\cfrac{l_2\cdot m_3 + l_3\cdot m_2}{2}&l_3\cdot m_3\end{pmatrix}\mathbf{c} = 0.$$
We can notice that each one of these coefficients is just the element-wise average of the outer product of $l'$ and $m'$ and its transpose, which is what I used in my implementation. One practical difference though is that it is difficult to index into the upper triangle of the outer product of $l'$ and $m'$ in this order. So instead I let $\mathbf{c} = \begin{pmatrix}a\\b\\d\\c\\e\\f\end{pmatrix}$. So the constraints are instead
$$\begin{pmatrix}l_1\cdot m_1&\cfrac{l_1\cdot m_2 + l_2\cdot m_1}{2}&\cfrac{l_1\cdot m_3 + l_3\cdot m_1}{2}&l_2\cdot m_2&\cfrac{l_2\cdot m_3 + l_3\cdot m_2}{2}&l_3\cdot m_3\end{pmatrix}\mathbf{c} = 0.$$
If we stack these constraints row-wise into a matrix $A$, we have an optimization problem. In particular,
$$\min_{||\mathbf{c}||=1} ||A\mathbf{c}||.$$
If we take the SVD decomposition of $A$, then $\mathbf{c}$ is just the last right singular vector (i.e. last row of $V^T$) because it is the unit vector that minimizes this equation. Now that we have $\mathbf{c}$, we can reconstruct $C_\infty^{*'}$. Also, since we know that $C_\infty^* = H C_\infty^{*'} H^T$, we can find $H$ by performing the SVD decomposition on $C_\infty^{*'}$ and get that
$$H_{\text{metric}} = \begin{pmatrix}\sqrt{\sigma_1^{-1}}&0&0\\0&\sqrt{\sigma_2^{-1}}&0\\0&0&1\end{pmatrix}U^T.$$
To undo the scaling, we need to left multiply by $S^{-1}$. Thus, the final homography after affine rectification is
$$H = S^{-1} H_{\text{metric}} S.$$
The first 20 annotations for each image were used to find the image of dual conic of the circular points.
Q5: Bonus: More Planar Homography from Point Correspondences¶
def q5():
my_annotations_path = ROOT_DIR / "data/annotation/q5_annotation.npy"
annotations = np.load(my_annotations_path, allow_pickle=True).item()
src_img_names = ["trains.jpg", "bongo.jpg", "nyan.png"]
tgt_img_name = "times_square.jpg"
tgt_img = Image.open(ROOT_DIR / "data/q3" / tgt_img_name)
tgt_img = np.array(tgt_img)
plt.title("Target Image")
plt.axis("off")
plt.imshow(tgt_img)
plt.show()
key = os.path.splitext(tgt_img_name)[0]
all_tgt_points = annotations.get(key)
composed_img = tgt_img.copy()
fig, axs = plt.subplots(1, 3)
fig.suptitle("Input Images")
for i, src_img_name in enumerate(src_img_names):
src_img = Image.open(ROOT_DIR / "data/q3" / src_img_name)
src_img = np.array(src_img)
axs[i].axis("off")
axs[i].imshow(src_img)
tgt_points = all_tgt_points[4 * i:4 * (i + 1)]
composed_img = compose_images_from_correspondences(src_img, composed_img, tgt_points)
plt.show()
plt.title("Composed Image From Multiple Point Correspondences")
plt.axis("off")
plt.imshow(composed_img)
plt.show()
q5()
5.3¶
For this question, I essentially just used compose_images_from_correspondences
that I wrote for Q3 and used that to add the corresponding images to the target image. I annotated the billboards in a clockwise manner.
Annotation Code¶
For completeness, the script that I used to annotate the images is annotate.py
.