HW1: Projective Geometry and Homography¶

Setup¶

In [1]:
%matplotlib inline
import os

import cv2
import matplotlib.pyplot as plt
import numpy as np

from pathlib import Path

from IPython.core.interactiveshell import InteractiveShell
from PIL import Image

from utils import cosine, my_warp, normalize

np.set_printoptions(suppress=True, precision=4)
InteractiveShell.ast_node_interactivity = "all"

Useful Helper Functions and Global Variables¶

In [2]:
ROOT_DIR = Path().parent

COLORS = None


def plot_lines(lines: np.ndarray, img: np.ndarray) -> np.ndarray:
  img_copy = img.copy()
  mean_size = np.mean(img.shape[:2])
  thickness = int(0.01 * mean_size)
  
  global COLORS
  
  if COLORS is None:
    COLORS = []

  for i in range(0, len(lines), 4):
    try:
      color = COLORS[i // 4]
    except IndexError:
      color = np.random.randint(0, 256, (3,))
      COLORS.append(color)
      
    img_copy = cv2.line(img_copy, lines[i].astype(int), lines[i + 1].astype(int), color.tolist(), thickness=thickness)
    img_copy = cv2.line(img_copy, lines[i + 2].astype(int), lines[i + 3].astype(int), color.tolist(), thickness=thickness)
    
  return img_copy


def points_to_lines(points: np.ndarray) -> np.ndarray:
  assert len(points) % 2 == 0
  
  points_copy = points.copy()
  
  if points.shape[1] == 2:
    points_copy = np.append(points_copy, np.ones((len(points), 1)), axis=1)
  
  lines = []
  
  for i in range(0, len(points), 2):
    lines.append(normalize(np.cross(points_copy[i], points_copy[i + 1])))
    
    assert np.isclose(lines[-1] @ points_copy[i], 0)
    assert np.isclose(lines[-1] @ points_copy[i + 1], 0)
    
  lines = np.array(lines)

  return lines


def lines_to_points(lines: np.ndarray) -> np.ndarray:
  assert len(lines) % 2 == 0
  
  points = []

  for i in range(0, len(lines), 2):
    points.append(np.cross(lines[i], lines[i + 1]))

    assert np.isclose(lines[i] @ points[-1], 0)
    assert np.isclose(lines[i + 1] @ points[-1], 0)
    
  points = np.array(points)
  # Homogenizing points
  points /= points[:, 2, np.newaxis]
  
  return points

Q1: Affine Rectification¶

In [3]:
def affine_rectification(points: np.ndarray, max_size: float) -> np.ndarray:
  # Assuming `points` is of shape (8, 2)
  assert points.shape == (8, 2)
  
  normal_points = points / max_size
  
  S = np.diag([1 / max_size, 1 / max_size, 1])
  
  # Find l'_\infty
  #   Calculate the lines for each pair of points
  lines_normalized = points_to_lines(normal_points)
  #   Calculate the intersection for each pair of lines
  intersects_normalized = lines_to_points(lines_normalized)
  #   Calculate l'_\infty by getting the line between the two intersection points
  l_infty_normalized = np.cross(intersects_normalized[0], intersects_normalized[1])
  l_infty_normalized /= l_infty_normalized[2]
  
  H = np.eye(3)
  H[2] = l_infty_normalized
  
  return np.linalg.inv(S) @ H @ S
In [4]:
def q1(image_name: str):
  # Affine rectification
  my_annotations_path = ROOT_DIR / "data/annotation/q1_annotation.npy"
  annotations = np.load(my_annotations_path, allow_pickle=True).item()
  
  key = os.path.splitext(image_name)[0]
  curr_annotation = annotations.get(key)
  curr_points = curr_annotation[:8]
  test_points = curr_annotation[8:]
  
  img = Image.open(ROOT_DIR / "data/q1" / image_name)
  img = np.array(img)
  
  H = affine_rectification(curr_points, max(img.shape[:2]))
  
  affine_rectified_img, H_final = my_warp(img, H)
  
  plt.title("Input Image")
  plt.axis("off")
  plt.imshow(img)
  plt.show()
  
  plt.title("Annotated Parallel Lines on Input Image")
  plt.axis("off")
  plt.imshow(plot_lines(curr_points, img))
  plt.show()
  
  plt.title("Affine-Rectified Image")
  plt.axis("off")
  plt.imshow(affine_rectified_img)
  plt.show()
  
  test_points_prime = np.squeeze(cv2.perspectiveTransform(test_points[:, np.newaxis], H))
  orig_lines = points_to_lines(test_points)
  transformed_lines = points_to_lines(test_points_prime)
  
  print(f"Cosine of 1st pair of lines: {cosine(*orig_lines[:2]):.4f} -> {cosine(*transformed_lines[:2]):.4f}")
  print(f"Cosine of 2nd pair of lines: {cosine(*orig_lines[2:]):.4f} -> {cosine(*transformed_lines[2:]):.4f}")

  plt.title("Test Lines on Input Image")
  plt.axis("off")
  plt.imshow(plot_lines(test_points, img))
  plt.show()
  
  transformed_test_points = np.squeeze(cv2.perspectiveTransform(test_points[:, np.newaxis], H_final))
  
  plt.title("Test Lines on Affine-Rectified Image")
  plt.axis("off")
  plt.imshow(plot_lines(transformed_test_points, affine_rectified_img))
  plt.show()
In [5]:
image_names = ["house.webp", "shower.jpg", "checker1.jpg", "tiles5.jpg", "facade.jpg"]

for i, image_name in enumerate(image_names):
  q1(image_name)
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Cosine of 1st pair of lines: 0.9988 -> 1.0000
Cosine of 2nd pair of lines: 1.0000 -> 1.0000
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Cosine of 1st pair of lines: 0.9537 -> 0.9995
Cosine of 2nd pair of lines: 0.9987 -> 0.9999
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Cosine of 1st pair of lines: 0.8964 -> 1.0000
Cosine of 2nd pair of lines: 0.9575 -> 1.0000
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Cosine of 1st pair of lines: 0.9868 -> 0.9999
Cosine of 2nd pair of lines: 0.9987 -> 0.9999
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Cosine of 1st pair of lines: 0.7842 -> 1.0000
Cosine of 2nd pair of lines: 1.0000 -> 1.0000
No description has been provided for this image
No description has been provided for this image

1.4¶

Given 8 points which denote 2 pairs of parallel lines, my implementation first scales the pixel coordinates down by the max of the height $h$ and width $w$ of the image so that the cross products are more well behaved. In other words, we are multiplying the homogenized pixel coordinates by the matrix $S$ given by

$$S = \begin{pmatrix}\cfrac{1}{\max(h, w)}&0&0\\0&\cfrac{1}{\max(h, w)}&0\\0&0&1\end{pmatrix}.$$

My implementation then converts the pairs of points to lines by using the property that

$$l = x_1 \times x_2.$$

After getting equations for lines, we can then find the points that these parallel lines intersect with the dual property:

$$\tilde{x} = l_1 \times l_2.$$

Finally, we can find the image of the line at infinity by taking the cross product of these intersecting points:

$$l'_\infty = \tilde{x}_1 \times \tilde{x}_2.$$

As mentioned during lecture, the homography $$H$$ is

$$H_{\text{affine}} = \begin{pmatrix}1&0&0\\0&1&0\\l_1&l_2&l_3\end{pmatrix}\qquad\text{given that }\quad l'_\infty = \begin{pmatrix}l_1\\l_2\\l_3\end{pmatrix}.$$

To undo the scaling, we need to left multiply by $S^{-1}$. Thus, the final homography after affine rectification is

$$H = S^{-1} H_{\text{affine}} S.$$

The first 8 annotations for each image were used to find the image of the line at infinity.

Q2: Metric Rectification¶

In [6]:
def metric_rectification(points: np.ndarray, max_size: float, affine_rectified: bool = False) -> np.ndarray:    
  normal_points = points / max_size
  
  S = np.diag([1 / max_size, 1 / max_size, 1])
  
  # Find $C_\infty^*'$
  #   Calculate the lines for each pair of points
  lines = points_to_lines(normal_points)
  #   Calculate the constraint coefficients on the entries of $C_\infty^*'$ from $l'^T C_\infty^*' m' = 0$
  num_pairs = len(lines) // 2
  triu_indices = np.triu_indices(2 if affine_rectified else 3)
  A = np.zeros((num_pairs, len(np.transpose(triu_indices))))

  for i in range(num_pairs):
    l_1 = lines[2 * i]
    l_2 = lines[(2 * i) + 1]
    outer = np.outer(l_1, l_2)
    sym_coefs = (outer + outer.T) / 2
    coefs = sym_coefs[triu_indices]
    A[i] = coefs
  
  #   Solve for $c$
  _, Sigma, Vt = np.linalg.svd(A)
  c = Vt[-1]
  
  assert np.allclose(A @ c, 0)
  
  # For affine-rectified images, d = e = f = 0
  C_star_prime_infty = np.zeros((3, 3))
  C_star_prime_infty[triu_indices] = c
  C_star_prime_infty = (C_star_prime_infty + C_star_prime_infty.T) / 2
  
  #   Compute $H$ such that $C_\infty^* = H C_\infty^*' H^T$ using SVD
  U, Sigma, _ = np.linalg.svd(C_star_prime_infty)
  Sigma_sqrt_reciprocal = Sigma.copy()
  Sigma_sqrt_reciprocal[2] = 1
  Sigma_sqrt_reciprocal = Sigma_sqrt_reciprocal ** -0.5
  H = np.diag(Sigma_sqrt_reciprocal) @ U.T
  
  # assert np.allclose(H @ C_star_prime_infty @ H.T, np.diag([1, 1, 0]))
  
  return np.linalg.inv(S) @ H @ S
In [7]:
def q2(image_name: str):
  # Metric rectification
  my_annotations_path = ROOT_DIR / "data/annotation/q2_annotation.npy"
  perp_annotations = np.load(my_annotations_path, allow_pickle=True).item()
    
  par_annotation_path = my_annotations_path.parent / my_annotations_path.name.replace("2", "1")
  par_annotations = np.load(par_annotation_path, allow_pickle=True).item()
    
  key = os.path.splitext(image_name)[0]
  par_curr_annotation = par_annotations.get(key)
  perp_curr_annotation = perp_annotations.get(key)
  par_curr_points = par_curr_annotation[:8]
  perp_curr_points = perp_curr_annotation[:8]
  perp_test_points = perp_curr_annotation[8:]
  
  img = Image.open(ROOT_DIR / "data/q1" / image_name)
  img = np.array(img)
  
  max_size = max(img.shape[:2])
  
  H_affine = affine_rectification(par_curr_points, max_size)
  
  perp_affine_rectified_points = np.squeeze(cv2.perspectiveTransform(perp_curr_points[:, np.newaxis], H_affine))
  
  H_metric = metric_rectification(perp_affine_rectified_points, max_size, affine_rectified=True)
  
  H = H_metric @ H_affine
  
  affine_rectified_img, H_affine = my_warp(img, H_affine)
  affine_rectified_curr_points = np.squeeze(cv2.perspectiveTransform(perp_curr_points[:, np.newaxis], H_affine))
  
  metric_affine_rectified_img, H_final = my_warp(img, H)
  
  plt.title("Input Image")
  plt.axis("off")
  plt.imshow(img)
  plt.show()
  
  plt.title("Annotated Perpendicular Lines on Input Image")
  plt.axis("off")
  plt.imshow(plot_lines(perp_curr_points, img))
  plt.show()
  
  plt.title("Annotated Perpendicular Lines on Affine-Rectified Image")
  plt.axis("off")
  plt.imshow(plot_lines(affine_rectified_curr_points, affine_rectified_img))
  plt.show()
  
  plt.title("Rectified Image")
  plt.axis("off")
  plt.imshow(metric_affine_rectified_img)
  plt.show()
  
  test_points_prime = np.squeeze(cv2.perspectiveTransform(perp_test_points[:, np.newaxis], H))
  orig_lines = points_to_lines(perp_test_points)
  transformed_lines = points_to_lines(test_points_prime)
  
  print(f"Cosine of 1st pair of lines: {cosine(*orig_lines[:2]):.4f} -> {cosine(*transformed_lines[:2]):.4f}")
  print(f"Cosine of 2nd pair of lines: {cosine(*orig_lines[2:]):.4f} -> {cosine(*transformed_lines[2:]):.4f}")
  
  plt.title("Test Lines on Input Image")
  plt.axis("off")
  plt.imshow(plot_lines(perp_test_points, img))
  plt.show()
  
  metric_rectified_test_points = np.squeeze(cv2.perspectiveTransform(perp_test_points[:, np.newaxis], H_final))
  
  plt.title("Test Lines on Metric-Rectified Image")
  plt.axis("off")
  plt.imshow(plot_lines(metric_rectified_test_points, metric_affine_rectified_img))
  plt.show()
In [8]:
image_names = ["house.webp", "shower.jpg", "checker1.jpg", "tiles5.jpg", "facade.jpg"]

for i, image_name in enumerate(image_names):
  q2(image_name)
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Cosine of 1st pair of lines: -0.0509 -> -0.0828
Cosine of 2nd pair of lines: -0.1608 -> -0.0315
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Cosine of 1st pair of lines: 0.1924 -> -0.0049
Cosine of 2nd pair of lines: -0.1989 -> 0.0085
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Cosine of 1st pair of lines: -0.2527 -> 0.0018
Cosine of 2nd pair of lines: 0.0882 -> 0.0082
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Cosine of 1st pair of lines: 0.1677 -> -0.0081
Cosine of 2nd pair of lines: 0.0350 -> -0.0293
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Cosine of 1st pair of lines: -0.1919 -> 0.0461
Cosine of 2nd pair of lines: -0.1065 -> 0.0107
No description has been provided for this image
No description has been provided for this image

2.4¶

Given 8 points which denote 2 pairs of perpendicular lines, my implementation first scales the pixel coordinates down by the max of the height $h$ and width $w$ of the image so that the cross products are more well behaved. In other words, we are multiplying the homogenized pixel coordinates by the matrix $S$ given by

$$S = \begin{pmatrix}\cfrac{1}{\max(h, w)}&0&0\\0&\cfrac{1}{\max(h, w)}&0\\0&0&1\end{pmatrix}.$$

My implementation then converts the pairs of points to lines by using the property that

$$l = x_1 \times x_2.$$

After getting equations for lines, we can then find the image of the dual conic $C_\infty^{*'}$ given the constraint that for images of perpendicular lines $l'$ and $m'$,

$$l'^T C_\infty^{*'} m' = 0.$$

In particular, we know that $C_\infty^{*'}$ will be in the form

$$C_\infty^{*'} = \begin{pmatrix}a&\cfrac{b}{2}&\cfrac{d}{2}\\\cfrac{b}{2}&c&\cfrac{e}{2}\\\cfrac{d}{2}&\cfrac{e}{2}&f\end{pmatrix}.$$

Since we know that the input image is affine-rectified, then we know that $d = e = f = 0$, thereby simplifying the matrix to be of the form

$$C_\infty^{*'} = \begin{pmatrix}a&\cfrac{b}{2}&0\\\cfrac{b}{2}&c&0\\0&0&0\end{pmatrix}.$$

If $l' = \begin{pmatrix}l_1\\l_2\\l_3\end{pmatrix}$ and $m' = \begin{pmatrix}m_1\\m_2\\m_3\end{pmatrix}$, then we get

$$l_1\cdot m_1\cdot a + \cfrac{l_1\cdot m_2 + l_2\cdot m_1}{2} \cdot b + l_2\cdot m_2\cdot c = 0.$$

Let $\mathbf{c} = \begin{pmatrix}a\\b\\c\end{pmatrix}$, then for every pair of perpendicular lines, we get a constraint on $\mathbf{c}$:

$$\begin{pmatrix}l_1\cdot m_1&\cfrac{l_1\cdot m_2 + l_2\cdot m_1}{2}&l_2\cdot m_2\end{pmatrix}\mathbf{c} = 0.$$

We can notice that each one of these coefficients is just the element-wise average of the outer product of the first two entries of $l'$ and $m'$ and its transpose, which is what I used in my implementation. If we stack these constraints row-wise into a matrix $A$, we have an optimization problem. In particular,

$$\min_{||\mathbf{c}||=1} ||A\mathbf{c}||.$$

If we take the SVD decomposition of $A$, then $\mathbf{c}$ is just the last right singular vector (i.e. last row of $V^T$) because it is the unit vector that minimizes this equation. Now that we have $\mathbf{c}$, we can reconstruct $C_\infty^{*'}$. Also, since we know that $C_\infty^* = H C_\infty^{*'} H^T$, we can find $H$ by performing the SVD decomposition on $C_\infty^{*'}$ and get that

$$H_{\text{metric}} = \begin{pmatrix}\sqrt{\sigma_1^{-1}}&0&0\\0&\sqrt{\sigma_2^{-1}}&0\\0&0&1\end{pmatrix}U^T.$$

To undo the scaling, we need to left multiply by $S^{-1}$. Thus, the final homography after affine rectification is

$$H = S^{-1} H_{\text{metric}} S.$$

The first 8 annotations for each image were used to find the image of dual conic of the circular points.

Q3: Planar Homography from Point Correspondences¶

In [9]:
def calculate_homography_from_correspondences(src: np.ndarray, tgt: np.ndarray) -> np.ndarray:
  ## Normalization of `src`
  t = src.mean(axis=0, keepdims=True)
  r_bar = np.linalg.norm(src - t, axis=0).mean()
  s = (2 ** 0.5) / r_bar
  T = np.diag([s, s, 1])
  T[:2, 2] = -t
  
  ## Normalization of `tgt`
  t_prime = tgt.mean(axis=0, keepdims=True)
  r_prime_bar = np.linalg.norm(tgt - t_prime, axis=0).mean()
  s_prime = (2 ** 0.5) / r_prime_bar
  T_prime = np.diag([s_prime, s_prime, 1])
  T_prime[:2, 2] = -t_prime

  ## DLT
  src_homogenized = np.append(src, np.ones((len(src), 1)), axis=1)
  src_prime = src_homogenized @ T.T
  src_prime_homogenized = src_prime / src_prime[:, 2, np.newaxis]
  
  tgt_homogenized = np.append(tgt, np.ones((len(tgt), 1)), axis=1)
  tgt_prime = tgt_homogenized @ T_prime.T
  tgt_prime_homogenized = tgt_prime / tgt_prime[:, 2, np.newaxis]
  
  # Create the constraints
  A = np.zeros((2 * len(src), 9))
  
  for i, (src_prime_i, tgt_prime_i) in enumerate(zip(src_prime_homogenized, tgt_prime_homogenized)):
    A[2 * i, 3:6] = -tgt_prime_i[2] * src_prime_i
    A[2 * i, 6:9] = tgt_prime_i[1] * src_prime_i
    A[(2 * i) + 1, 0:3] = tgt_prime_i[2] * src_prime_i
    A[(2 * i) + 1, 6:9] = -tgt_prime_i[0] * src_prime_i
    
  # Get h
  _, _, Vt = np.linalg.svd(A)
  h = Vt[-1]
  
  H_tilde = h.reshape(3, 3)
  
  ## Denormalization
  H = np.linalg.inv(T_prime) @ H_tilde @ T
  
  return H

Useful Helper Functions¶

In [10]:
def compose_images_from_correspondences(src_img: np.ndarray, tgt_img: np.ndarray, tgt_points: np.ndarray, clockwise: bool = True) -> np.ndarray:
  output_img = tgt_img.copy()
  
  h, w = src_img.shape[:2]
  
  src_points = np.array([[0, 0],
                          [w - 1, 0],
                          [w - 1, h - 1],
                          [0, h - 1]]).astype(float)
  
  if not clockwise:
    src_points[[1, 3]] = src_points[[3, 1]]
  
  H = calculate_homography_from_correspondences(src_points, tgt_points)
  
  warped_src_img = cv2.warpPerspective(src_img, H, tgt_img.shape[1::-1])

  img_mask = np.ones((h, w, 3))
  warped_src_img_mask = cv2.warpPerspective(img_mask, H, tgt_img.shape[1::-1]).astype(bool)
  
  output_img[warped_src_img_mask] = 0
  output_img += warped_src_img
  
  return output_img
In [11]:
def annotate_corners(points: np.ndarray, img: np.ndarray) -> np.ndarray:
  img_copy = img.copy()
  mean_size = np.mean(img.shape[:2])
  thickness = int(0.01 * mean_size)
  
  global COLORS
  
  color = COLORS[0].tolist()
  
  for i, point in enumerate(points, start=1):
    img_copy = cv2.circle(img_copy, point.astype(int), radius=0, color=color, thickness=thickness * 3)
    img_copy = cv2.putText(img_copy, str(i), point.astype(int), cv2.FONT_HERSHEY_SIMPLEX, 2, color=color, thickness=thickness, lineType=cv2.LINE_AA)
    
  return img_copy
In [12]:
def q3(src_img_name: str, tgt_img_name: str):
  # Planar Homography from Point Correspondences
  my_annotations_path = ROOT_DIR / "data/annotation/q3_annotation.npy"
  annotations = np.load(my_annotations_path, allow_pickle=True).item()

  src_img = Image.open(ROOT_DIR / "data/q3" / src_img_name)
  src_img = np.array(src_img)
  
  tgt_img = Image.open(ROOT_DIR / "data/q3" / tgt_img_name)
  tgt_img = np.array(tgt_img)
  
  key = os.path.splitext(tgt_img_name)[0]
  tgt_points = annotations.get(key)
  
  composed_img = compose_images_from_correspondences(src_img, tgt_img, tgt_points)
  
  plt.title("Normal Image")
  plt.axis("off")
  plt.imshow(src_img)
  plt.show()
  
  plt.title("Perspective Image")
  plt.axis("off")
  plt.imshow(tgt_img)
  plt.show()
  
  plt.title("Annotated Corners in Perspective Image")
  plt.axis("off")
  plt.imshow(annotate_corners(tgt_points, tgt_img))
  plt.show()
  
  plt.title("Warped and Overlaid Image")
  plt.axis("off")
  plt.imshow(composed_img)
  plt.show()
In [13]:
image_name_tuples = [("trains.jpg", "journal.jpg"), ("desk-normal.png", "desk-perspective.png")]

for image_name_tuple in image_name_tuples:
  q3(*image_name_tuple)
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

3.3¶

Essentially, I just followed the pseudocode given in class. To explain further, my implementation first normalizes the source points so that they are $\sqrt 2$ away from the origin on average and calculates the overall similarity transformation $T$. Afterwards, my implementation then does a similar normalization for the corresponding points in the target image and calculate the similarity transformation $T'$. We know that given a homography $H$, we ideally want $H\mathbf{x} \equiv \mathbf{x}'$, or in other words that $H\mathbf{x} \times \mathbf{x}' = \mathbf{0}$. From some clever identities shown during lecture, we know that for a given pair of corresponding points, we get two constraints on the flattened vector of $H$'s elements $\mathbf{h}$:

$$\begin{pmatrix}\mathbf{0}&-w'\mathbf{x}^T&y'\mathbf{x}^T\\w'\mathbf{x}^T&\mathbf{0}&-x'\mathbf{x}^T\end{pmatrix}\mathbf{h} = \mathbf{0} \qquad \text{given that } \mathbf{x}' = \begin{pmatrix}x'\\y'\\w'\end{pmatrix}.$$

After performing the similarity transformations of $T$ to the source points and $T'$ to the target points and homogenizing them, we can perform the Direct Linear Transformation (DLT) algorithm and get $\mathbf{h}$. From here, we can reshape $\mathbf{h}$ to get $\tilde{H}$ and perform the denormalization step of $(T')^{-1}$ to get the final homography:

$$H = (T')^{-1} \tilde{H} T.$$

Even though the instructions the corresponding points are annotated in anti-clockwise order, they are actually labeled in clockwise order. I added an additional parameter to compose_images_from_correspondences so that the user can specify whichever annotation order.

Q4: Bonus: Metric Rectification from Perpendicular Lines¶

In [14]:
def q4(image_name: str):
  my_annotations_path = ROOT_DIR / "data/annotation/q4_annotation.npy"
  perp_annotations = np.load(my_annotations_path, allow_pickle=True).item()
    
  key = os.path.splitext(image_name)[0]
  perp_curr_annotation = perp_annotations.get(key)
  perp_curr_points = perp_curr_annotation[:20]
  perp_test_points = perp_curr_annotation[20:]
  
  img = Image.open(ROOT_DIR / "data/q1" / image_name)
  img = np.array(img)
  
  max_size = max(img.shape[:2])
  
  H = metric_rectification(perp_curr_points, max_size)
  
  metric_rectified_img, H_final = my_warp(img, H)
  
  metric_rectified_curr_points = np.squeeze(cv2.perspectiveTransform(perp_curr_points[:, np.newaxis], H_final))
  
  plt.title("Input Image")
  plt.axis("off")
  plt.imshow(img)
  plt.show()
  
  plt.title("Annotated Perpendicular Lines")
  plt.axis("off")
  plt.imshow(plot_lines(perp_curr_points, img))
  plt.show()
  
  plt.title("Rectified Image")
  plt.axis("off")
  plt.imshow(plot_lines(metric_rectified_curr_points, metric_rectified_img))
  plt.show()
  
  test_points_prime = np.squeeze(cv2.perspectiveTransform(perp_test_points[:, np.newaxis], H))
  orig_lines = points_to_lines(perp_test_points)
  transformed_lines = points_to_lines(test_points_prime)
  
  print(f"Cosine of 1st pair of lines: {cosine(*orig_lines[:2]):.4f} -> {cosine(*transformed_lines[:2]):.4f}")
  print(f"Cosine of 2nd pair of lines: {cosine(*orig_lines[2:4]):.4f} -> {cosine(*transformed_lines[2:4]):.4f}")
  print(f"Cosine of 3rd pair of lines: {cosine(*orig_lines[4:]):.4f} -> {cosine(*transformed_lines[4:]):.4f}")
  
  plt.title("Test Lines on Input Image")
  plt.axis("off")
  plt.imshow(plot_lines(perp_test_points, img))
  plt.show()
  
  metric_rectified_test_points = np.squeeze(cv2.perspectiveTransform(perp_test_points[:, np.newaxis], H_final))
  
  plt.title("Test Lines on Metric-Rectified Image")
  plt.axis("off")
  plt.imshow(plot_lines(metric_rectified_test_points, metric_rectified_img))
  plt.show()
  
In [15]:
image_names = ["papers.jpg", "checker1.jpg"]

for i, image_name in enumerate(image_names):
  q4(image_name)
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Cosine of 1st pair of lines: -0.3933 -> -0.0998
Cosine of 2nd pair of lines: -0.6658 -> -0.0887
Cosine of 3rd pair of lines: -0.5957 -> -0.0441
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Cosine of 1st pair of lines: -0.6192 -> -0.1122
Cosine of 2nd pair of lines: 0.4841 -> 0.0304
Cosine of 3rd pair of lines: -0.4757 -> -0.0718
No description has been provided for this image
No description has been provided for this image

4.4¶

Much of my implementation is the same as that for Q2, just that we know that the image of the dual conic doesn't necessarily have that $d = e = f = 0$. So

$$C_\infty^{*'} = \begin{pmatrix}a&\cfrac{b}{2}&\cfrac{d}{2}\\\cfrac{b}{2}&c&\cfrac{e}{2}\\\cfrac{d}{2}&\cfrac{e}{2}&f\end{pmatrix}.$$

If $l' = \begin{pmatrix}l_1\\l_2\\l_3\end{pmatrix}$ and $m' = \begin{pmatrix}m_1\\m_2\\m_3\end{pmatrix}$, then we get

$$l_1\cdot m_1\cdot a + \cfrac{l_1\cdot m_2 + l_2\cdot m_1}{2} \cdot b + l_2\cdot m_2\cdot c + \cfrac{l_1\cdot m_3 + l_3\cdot m_1}{2}\cdot d + \cfrac{l_2\cdot m_3 + l_3\cdot m_2}{2}\cdot e + l_3\cdot m_3\cdot f= 0.$$

Let $\mathbf{c} = \begin{pmatrix}a\\b\\c\\d\\e\\f\end{pmatrix}$, then for every pair of perpendicular lines, we get a constraint on $\mathbf{c}$:

$$\begin{pmatrix}l_1\cdot m_1&\cfrac{l_1\cdot m_2 + l_2\cdot m_1}{2}&l_2\cdot m_2&\cfrac{l_1\cdot m_3 + l_3\cdot m_1}{2}&\cfrac{l_2\cdot m_3 + l_3\cdot m_2}{2}&l_3\cdot m_3\end{pmatrix}\mathbf{c} = 0.$$

We can notice that each one of these coefficients is just the element-wise average of the outer product of $l'$ and $m'$ and its transpose, which is what I used in my implementation. One practical difference though is that it is difficult to index into the upper triangle of the outer product of $l'$ and $m'$ in this order. So instead I let $\mathbf{c} = \begin{pmatrix}a\\b\\d\\c\\e\\f\end{pmatrix}$. So the constraints are instead

$$\begin{pmatrix}l_1\cdot m_1&\cfrac{l_1\cdot m_2 + l_2\cdot m_1}{2}&\cfrac{l_1\cdot m_3 + l_3\cdot m_1}{2}&l_2\cdot m_2&\cfrac{l_2\cdot m_3 + l_3\cdot m_2}{2}&l_3\cdot m_3\end{pmatrix}\mathbf{c} = 0.$$

If we stack these constraints row-wise into a matrix $A$, we have an optimization problem. In particular,

$$\min_{||\mathbf{c}||=1} ||A\mathbf{c}||.$$

If we take the SVD decomposition of $A$, then $\mathbf{c}$ is just the last right singular vector (i.e. last row of $V^T$) because it is the unit vector that minimizes this equation. Now that we have $\mathbf{c}$, we can reconstruct $C_\infty^{*'}$. Also, since we know that $C_\infty^* = H C_\infty^{*'} H^T$, we can find $H$ by performing the SVD decomposition on $C_\infty^{*'}$ and get that

$$H_{\text{metric}} = \begin{pmatrix}\sqrt{\sigma_1^{-1}}&0&0\\0&\sqrt{\sigma_2^{-1}}&0\\0&0&1\end{pmatrix}U^T.$$

To undo the scaling, we need to left multiply by $S^{-1}$. Thus, the final homography after affine rectification is

$$H = S^{-1} H_{\text{metric}} S.$$

The first 20 annotations for each image were used to find the image of dual conic of the circular points.

Q5: Bonus: More Planar Homography from Point Correspondences¶

In [16]:
def q5():
  my_annotations_path = ROOT_DIR / "data/annotation/q5_annotation.npy"
  annotations = np.load(my_annotations_path, allow_pickle=True).item()
    
  src_img_names = ["trains.jpg", "bongo.jpg", "nyan.png"]
  
  tgt_img_name = "times_square.jpg"
  
  tgt_img = Image.open(ROOT_DIR / "data/q3" / tgt_img_name)
  tgt_img = np.array(tgt_img)
  
  plt.title("Target Image")
  plt.axis("off")
  plt.imshow(tgt_img)
  plt.show()
  
  key = os.path.splitext(tgt_img_name)[0]
  all_tgt_points = annotations.get(key)
  
  composed_img = tgt_img.copy()
  
  fig, axs = plt.subplots(1, 3)
  
  fig.suptitle("Input Images")
  
  for i, src_img_name in enumerate(src_img_names):
    src_img = Image.open(ROOT_DIR / "data/q3" / src_img_name)
    src_img = np.array(src_img)
    
    axs[i].axis("off")
    axs[i].imshow(src_img)
    
    tgt_points = all_tgt_points[4 * i:4 * (i + 1)]
  
    composed_img = compose_images_from_correspondences(src_img, composed_img, tgt_points)
  
  plt.show()
  
  plt.title("Composed Image From Multiple Point Correspondences")
  plt.axis("off")
  plt.imshow(composed_img)
  plt.show()
In [17]:
q5()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

5.3¶

For this question, I essentially just used compose_images_from_correspondences that I wrote for Q3 and used that to add the corresponding images to the target image. I annotated the billboards in a clockwise manner.

Annotation Code¶

For completeness, the script that I used to annotate the images is annotate.py.