HW3: 3D reconstruction¶

Setup¶

In [1]:
%matplotlib inline
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np

from typing import Tuple
from tqdm import tqdm
from PIL import Image

np.set_printoptions(suppress=True, precision=4)
In [21]:
def homogenize(pts: np.ndarray) -> np.ndarray:
  return np.append(pts, np.ones((len(pts), 1)), axis=1)

Q1: 8-point and 7-point algorithm¶

(A1) F matrix using 8-point algorithm¶

In [22]:
def f_8_pt(pts1: np.ndarray, pts2: np.ndarray) -> np.ndarray:
  mean_1 = pts1.mean(axis=0)
  d_1 = np.linalg.norm(pts1 - mean_1, axis=1).mean()
  s_1 = np.sqrt(2) / d_1
  T = np.diag([s_1, s_1, 1])
  T[:2, 2] = -s_1 * mean_1
  
  pts1_homogenized = homogenize(pts1)
  pts1_normalized = pts1_homogenized @ T.T
  
  mean_2 = pts2.mean(axis=0)
  d_2 = np.linalg.norm(pts2 - mean_2, axis=1).mean()
  s_2 = np.sqrt(2) / d_2
  T_prime = np.diag([s_2, s_2, 1])
  T_prime[:2, 2] = -s_2 * mean_2
  
  pts2_homogenized = homogenize(pts2)
  pts2_normalized = pts2_homogenized @ T_prime.T
  
  u_prime = pts2_normalized[:, 0]
  v_prime = pts2_normalized[:, 1]
  u = pts1_normalized[:, 0]
  v = pts1_normalized[:, 1]
  
  A = np.column_stack([u_prime * u, u_prime * v, u_prime,
                       v_prime * u, v_prime * v, v_prime,
                       u, v, np.ones_like(v)])
    
  # Get f
  _, _, Vt = np.linalg.svd(A)
  f = Vt[-1]
  
  F = f.reshape(3, -1)
  
  U, S, Vt = np.linalg.svd(F)
  S[-1] = 0
  
  F_star = U @ np.diag(S) @ Vt
  
  F_final = T_prime.T @ F_star @ T
  F_final /= F_final[-1, -1]
  
  return F_final
In [23]:
def plot_epipolar_lines(F: np.ndarray,
                        pts: np.ndarray,
                        img1: np.ndarray,
                        img2: np.ndarray):
  colors = []
  epipolar_lines = []
  
  for pt in pts:
    color = np.random.randint(255, size=3)
    colors.append(color.tolist())
    l = F @ np.append(pt, 1)
    min_x = 0
    max_x = img2.shape[1]
    
    min_y = int((-l[0] * min_x - l[2]) / l[1])
    max_y = int((-l[0] * max_x - l[2]) / l[1])
    
    epipolar_lines.append(((min_x, min_y), (max_x, max_y)))
    
  annotated_img1 = img1.copy()
  annotated_img2 = img2.copy()
  
  for color, pt, epipolar_line in zip(colors, pts, epipolar_lines):
    annotated_img1 = cv2.circle(annotated_img1, pt.astype(int), 10, color, -1)
    annotated_img2 = cv2.line(annotated_img2, *epipolar_line, color, 5)
    
  plt.title("Viewpoint 1")
  plt.axis("off")
  plt.imshow(annotated_img1)
  plt.show()

  plt.title("Viewpoint 2")
  plt.axis("off")
  plt.imshow(annotated_img2)
  plt.show()
In [24]:
def q1a1(img_dir: str):
  common_root = f"data/q1a/{img_dir}"
  corresp = np.load(f"{common_root}/corresp.npz", allow_pickle=True)
  
  pts1 = corresp["pts1"]
  pts2 = corresp["pts2"]
  
  F = f_8_pt(pts1, pts2)
  
  img1 = np.array(Image.open(f"{common_root}/image1.jpg"))
  img2 = np.array(Image.open(f"{common_root}/image2.jpg"))
  
  plot_epipolar_lines(F, pts1, img1, img2)
  print(F)
In [25]:
img_dirs = ["bench", "remote"]

for img_dir in img_dirs:
  q1a1(img_dir)
No description has been provided for this image
No description has been provided for this image
[[-0.     -0.      0.0002]
 [-0.      0.      0.0249]
 [-0.0011 -0.023   1.    ]]
No description has been provided for this image
No description has been provided for this image
[[ 0.     -0.      0.0024]
 [-0.     -0.     -0.0085]
 [-0.004   0.0095  1.    ]]

My implementation first normalizes each set of points so that they are mean centered and on average unit distance away. Then I use the relation that $x_i'^TFx_i = 0$ to build the constraint matrix. I enforce that $F$ is rank two by manually setting the last singular value to 0 and then perform the reverse transformations back to pixel space.

(A2) E matrix using 8-point algorithm¶

In [26]:
def q1a2(img_dir: str):
  common_root = f"data/q1a/{img_dir}"
  corresp = np.load(f"{common_root}/corresp.npz", allow_pickle=True)
  intrinsics = np.load(f"{common_root}/intrinsics.npz", allow_pickle=True)
  
  pts1 = corresp["pts1"]
  pts2 = corresp["pts2"]
  
  K1 = intrinsics["K1"]
  K2 = intrinsics["K2"]
  
  F = f_8_pt(pts1, pts2)
  
  E = K2.T @ F @ K1
  E /= E[-1, -1]
  
  print(E)
In [27]:
img_dirs = ["bench", "remote"]

for img_dir in img_dirs:
  q1a2(img_dir)
[[ -0.4152  -8.8175  -2.1506]
 [ -1.3804   0.7907  69.0827]
 [ -3.5889 -68.4744   1.    ]]
[[  2.5298  -0.6706   7.3309]
 [ -6.6203  -3.0277 -28.2986]
 [-14.5007  26.4182   1.    ]]

Since $E$ is related to $F$ by $E = K'TFK$, I just performed that calculation to compute $E$.

(B) 7-point algorithm¶

In [28]:
def f_7_pt(pts1: np.ndarray, pts2: np.ndarray) -> np.ndarray:
  mean_1 = pts1.mean(axis=0)
  d_1 = np.linalg.norm(pts1 - mean_1, axis=1).mean()
  s_1 = np.sqrt(2) / d_1
  T = np.diag([s_1, s_1, 1])
  T[:2, 2] = -s_1 * mean_1
  
  pts1_homogenized = homogenize(pts1)
  pts1_normalized = pts1_homogenized @ T.T
  
  mean_2 = pts2.mean(axis=0)
  d_2 = np.linalg.norm(pts2 - mean_2, axis=1).mean()
  s_2 = np.sqrt(2) / d_2
  T_prime = np.diag([s_2, s_2, 1])
  T_prime[:2, 2] = -s_2 * mean_2
  
  pts2_homogenized = homogenize(pts2)
  pts2_normalized = pts2_homogenized @ T_prime.T
  
  A = np.zeros((7, 9))
  
  u_prime = pts2_normalized[:, 0]
  v_prime = pts2_normalized[:, 1]
  u = pts1_normalized[:, 0]
  v = pts1_normalized[:, 1]
  
  A = np.column_stack([u_prime * u, u_prime * v, u_prime,
                       v_prime * u, v_prime * v, v_prime,
                       u, v, np.ones_like(v)])
    
  # Get f
  _, _, Vt = np.linalg.svd(A)
  f1 = Vt[-2]
  f2 = Vt[-1]
  
  F1 = f1.reshape(3, -1)
  F2 = f2.reshape(3, -1)
  
  polynomial = lambda a: a * F1 + (1 - a) * F2
  
  c = np.linalg.det(list(map(polynomial, range(4))))
  
  A = np.array([[0, 0, 0, 1],
                [1, 1, 1, 1],
                [8, 4, 2, 1],
                [27, 9, 3, 1]])
  coefficients = np.linalg.solve(A, c)
  
  a_roots = np.roots(coefficients)

  real_roots = list(map(lambda x: x.real, filter(lambda x: np.isclose(x.imag, 0), a_roots)))
  
  Fs_final = []
  
  for real_root in real_roots:
    interpolated_F = polynomial(real_root)
    F_final = T_prime.T @ interpolated_F @ T
    F_final /= F_final[-1, -1]
    Fs_final.append(F_final)
  
  return np.stack(Fs_final)
In [29]:
def q1b(img_dir: str):
  common_root = f"data/q1b/{img_dir}"
  corresp = np.load(f"{common_root}/corresp.npz", allow_pickle=True)
  
  pts1 = corresp["pts1"]
  pts2 = corresp["pts2"]
  
  Fs = f_7_pt(pts1, pts2)
  
  img1 = np.array(Image.open(f"{common_root}/image1.jpg"))
  img2 = np.array(Image.open(f"{common_root}/image2.jpg"))
  
  for F in Fs:
    plot_epipolar_lines(F, pts1, img1, img2)
    print(F)
In [30]:
img_dirs = [
  "ball", 
  "hydrant"
  ]

for img_dir in img_dirs:
  q1b(img_dir)
No description has been provided for this image
No description has been provided for this image
[[-0.     -0.     -0.0067]
 [ 0.     -0.     -0.015 ]
 [ 0.0091  0.0149  1.    ]]
No description has been provided for this image
No description has been provided for this image
[[ 0.      0.     -0.0086]
 [-0.      0.      0.0077]
 [ 0.0063 -0.0119  1.    ]]
No description has been provided for this image
No description has been provided for this image
[[ 0.     -0.     -0.0014]
 [ 0.      0.     -0.0167]
 [ 0.0011  0.0164  1.    ]]
No description has been provided for this image
No description has been provided for this image
[[ 0.      0.     -0.0067]
 [-0.      0.      0.0014]
 [ 0.005  -0.0046  1.    ]]

For my implementation of the 7-point algorithm, I did the point normalization, constructed the same constraints as with the 8-point algorithm, then found the coefficients of the polynomial $\det(\lambda F_1 + (1-\lambda)F_2)$ by evaluating the polynomial at $\lambda=0,1,2,3$. I return all of the real valued roots of the polynomial and create the corresponding $F$ matrices based off of those roots. I manually inspect the epipolar lines to determine which is the correct one (if there are multiple real roots). For the hydrant, the second solution seems correct given the difference in views.

Q2: RANSAC with 7-point and 8-point algorithm¶

In [31]:
def calculate_errors(F: np.ndarray, pts1: np.ndarray, pts2: np.ndarray) -> np.ndarray:
  pts1 = homogenize(pts1)
  pts2 = homogenize(pts2)
  
  l1 = pts1 @ F.T
  l2 = pts2 @ F
  
  d1 = np.abs((l1 * pts2).sum(axis=1)) / np.linalg.norm(l1[:, :2], axis=1)
  d2 = np.abs((l2 * pts1).sum(axis=1)) / np.linalg.norm(l2[:, :2], axis=1)
  
  return d1 + d2
In [32]:
def ransac(k: int, t: float, pts1: np.ndarray, pts2: np.ndarray, img1: np.ndarray, img2: np.ndarray, show_inlier_graph: bool = True, show_best_f: bool = False):
  d_min_8pt = d_min_7pt = 0
  F_best_8pt = F_best_7pt = None

  inlier_8pts = None
  inlier_7pts = None
  
  percent_inliers_8pt = []
  percent_inliers_7pt = []
  
  N = len(pts1)
  
  for _ in tqdm(range(k)):
    random_8pts = np.random.choice(N, 8, replace=False)
    
    F_8pt = f_8_pt(pts1[random_8pts], pts2[random_8pts])
    
    error_8pt = calculate_errors(F_8pt, pts1, pts2)
    
    d_8pt = (error_8pt <= t).sum()
    
    if d_8pt > d_min_8pt:
      inlier_8pts = pts1[error_8pt <= t]
      d_min_8pt = d_8pt
      F_best_8pt = F_8pt
      
    percent_inliers_8pt.append(d_min_8pt / N)
    
    random_7pts = np.random.choice(N, 7, replace=False)
    
    Fs_7pt = f_7_pt(pts1[random_7pts], pts2[random_7pts])
    
    for F_7pt in Fs_7pt:
      error_7pt = calculate_errors(F_7pt, pts1, pts2)
      
      d_7pt = (error_7pt <= t).sum()
    
      if d_7pt > d_min_7pt:
        inlier_7pts = pts1[error_7pt <= t]
        d_min_7pt = d_7pt
        F_best_7pt = F_7pt
        
    percent_inliers_7pt.append(d_min_7pt / N)
  
  if not show_best_f:
    plot_epipolar_lines(F_best_8pt, inlier_8pts, img1, img2)
    print(F_best_8pt)
    plot_epipolar_lines(F_best_7pt, inlier_7pts, img1, img2)
    print(F_best_7pt)
  else:
    if inlier_8pts.shape[0] > inlier_7pts.shape[0]:
      plot_epipolar_lines(F_best_8pt, inlier_8pts, img1, img2)
      print(F_best_8pt)
    else:
      plot_epipolar_lines(F_best_7pt, inlier_7pts, img1, img2)
      print(F_best_7pt)
      
  if show_inlier_graph:
    plt.plot(percent_inliers_8pt, label="8-point")
    plt.plot(percent_inliers_7pt, label="7-point")
    plt.xlabel("iterations")
    plt.ylabel("Percent Inliers")
    plt.legend(loc="lower right")
    plt.show()
In [33]:
def q2(img_dir: str):
  common_root = f"data/{img_dir}"
  corresp = np.load(f"{common_root}/corresp_noisy.npz", allow_pickle=True)
  
  pts1 = corresp["pts1"]
  pts2 = corresp["pts2"]
  
  img1 = np.array(Image.open(f"{common_root}/image1.jpg"))
  img2 = np.array(Image.open(f"{common_root}/image2.jpg"))
  
  ransac(k=10000, t=2, pts1=pts1, pts2=pts2, img1=img1, img2=img2)
In [34]:
img_dirs = ["q1a/bench", "q1a/remote", "q1b/ball", "q1b/hydrant"]

for img_dir in img_dirs:
  q2(img_dir)
100%|██████████| 10000/10000 [00:08<00:00, 1147.95it/s]
No description has been provided for this image
No description has been provided for this image
[[-0.     -0.     -0.0001]
 [-0.      0.      0.0236]
 [-0.0008 -0.0221  1.    ]]
No description has been provided for this image
No description has been provided for this image
[[-0.     -0.      0.0001]
 [-0.      0.      0.0244]
 [-0.001  -0.0227  1.    ]]
No description has been provided for this image
  0%|          | 0/10000 [00:00<?, ?it/s]/var/folders/b0/l1qz1mms5hb1swdf228jq8680000gn/T/ipykernel_66338/4101735544.py:8: RuntimeWarning: divide by zero encountered in divide
  d1 = np.abs((l1 * pts2).sum(axis=1)) / np.linalg.norm(l1[:, :2], axis=1)
  5%|▌         | 515/10000 [00:00<00:03, 2655.20it/s]/var/folders/b0/l1qz1mms5hb1swdf228jq8680000gn/T/ipykernel_66338/4101735544.py:9: RuntimeWarning: divide by zero encountered in divide
  d2 = np.abs((l2 * pts1).sum(axis=1)) / np.linalg.norm(l2[:, :2], axis=1)
/var/folders/b0/l1qz1mms5hb1swdf228jq8680000gn/T/ipykernel_66338/4101735544.py:9: RuntimeWarning: invalid value encountered in divide
  d2 = np.abs((l2 * pts1).sum(axis=1)) / np.linalg.norm(l2[:, :2], axis=1)
 30%|██▉       | 2980/10000 [00:01<00:02, 3087.51it/s]/var/folders/b0/l1qz1mms5hb1swdf228jq8680000gn/T/ipykernel_66338/4101735544.py:8: RuntimeWarning: invalid value encountered in divide
  d1 = np.abs((l1 * pts2).sum(axis=1)) / np.linalg.norm(l1[:, :2], axis=1)
100%|██████████| 10000/10000 [00:03<00:00, 3125.39it/s]
No description has been provided for this image
No description has been provided for this image
[[-0.      0.0001 -0.0045]
 [-0.0001 -0.     -0.0333]
 [ 0.0023  0.0569  1.    ]]
No description has been provided for this image
No description has been provided for this image
[[ 0.     -0.      0.0035]
 [ 0.     -0.     -0.0102]
 [-0.006   0.0121  1.    ]]
No description has been provided for this image
100%|██████████| 10000/10000 [00:07<00:00, 1280.14it/s]
No description has been provided for this image
No description has been provided for this image
[[-0.     -0.     -0.0057]
 [ 0.     -0.     -0.0148]
 [ 0.008   0.0149  1.    ]]
No description has been provided for this image
No description has been provided for this image
[[-0.     -0.     -0.0065]
 [ 0.     -0.     -0.0146]
 [ 0.0088  0.0145  1.    ]]
No description has been provided for this image
100%|██████████| 10000/10000 [00:09<00:00, 1079.96it/s]
No description has been provided for this image
No description has been provided for this image
[[-0.     -0.      0.0009]
 [ 0.     -0.     -0.0226]
 [-0.0014  0.0233  1.    ]]
No description has been provided for this image
No description has been provided for this image
[[ 0.     -0.     -0.0011]
 [ 0.     -0.     -0.017 ]
 [ 0.0008  0.0168  1.    ]]
No description has been provided for this image

In my implementation of RANSAC, I sample 8 or 7 random point correspondences, then calculate the distances of the points to the epipolar lines. If there are multiple real roots for the 7 point algorithm, I loop through all of them.

Q3: Triangulation¶

In [35]:
def triangulate(img1: np.ndarray,
                img2: np.ndarray,
                P1: np.ndarray,
                P2: np.ndarray,
                pts1: np.ndarray,
                pts2: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
  
  pts = []
  colors = []
  
  above_main_diag_indices = np.triu_indices(3, 1)
  
  for pt1, pt2 in zip(pts1, pts2):
    A = np.zeros((4, 4))
    
    pt1_cross_entries = np.append(pt1.copy(), 1)[::-1]
    pt1_cross_entries[::2] *= -1
    pt1_cross_matrix = np.zeros((3, 3))
    pt1_cross_matrix[above_main_diag_indices] = pt1_cross_entries
    pt1_cross_matrix -= pt1_cross_matrix.T
    
    A[:2] = (pt1_cross_matrix @ P1)[:2]
    
    pt2_cross_entries = np.append(pt2.copy(), 1)[::-1]
    pt2_cross_entries[::2] *= -1
    pt2_cross_matrix = np.zeros((3, 3))
    pt2_cross_matrix[above_main_diag_indices] = pt2_cross_entries
    pt2_cross_matrix -= pt2_cross_matrix.T
    
    A[2:] = (pt2_cross_matrix @ P2)[:2]
    
    _, _, Vt = np.linalg.svd(A)
    X = Vt[-1]
    X /= X[-1]
    
    pts.append(X[:3])
    
    img1_color = img1[tuple(pt1[::-1])].astype(float)
    img2_color = img2[tuple(pt2[::-1])].astype(float)
    
    colors.append((img1_color + img2_color) / 2)
  
  pts = np.stack(pts)
  colors = np.stack(colors)
  
  return pts, colors
In [36]:
def q3():
  img1 = np.array(Image.open("data/q3/img1.jpg"))
  img2 = np.array(Image.open("data/q3/img2.jpg"))

  P1 = np.load("data/q3/P1.npy")
  P2 = np.load("data/q3/P2.npy")

  pts1 = np.load("data/q3/pts1.npy")
  pts2 = np.load("data/q3/pts2.npy")

  pts, colors = triangulate(img1, img2, P1, P2, pts1, pts2)

  fig = plt.figure()
  ax = fig.add_subplot(projection='3d')
  
  ax.scatter(pts[:, 0], pts[:, 1], pts[:, 2], c=colors / 255., s=0.2)
  ax.view_init(azim=-100, elev=-60)
  plt.show()
In [37]:
q3()
No description has been provided for this image

To triangulate, I created the constraints using the relations $x \times PX = 0$ and $x' \times P'X = 0$ for each pair of corresponding points $x, x'$. I average the colors of the corresponding pixels and just plot the point cloud from there.

Q4: Reconstruct your own scene!¶

In [42]:
def q4(img_dir: str):
  images = list(os.scandir(f"data/q4/{img_dir}"))[:6]
  num_columns = int(np.ceil(len(images) / 2))
  _, axs = plt.subplots(2, num_columns)
  axs = axs.flatten()
  for img, ax in zip(images, axs):
      img = np.array(Image.open(img.path))
      ax.set_axis_off()
      ax.imshow(img)
      
  if len(images) % 2:
    axs[-1].remove()
  
  plt.show()
In [43]:
image_dirs = ["bag", "mug2"]

for image_dir in image_dirs:
  q4(image_dir)
No description has been provided for this image
No description has been provided for this image

Bag reconstruction

Mug reconstruction

Q5: Bonus 1 - Fundamental matrix estimation on your own images.¶

In [40]:
def q5(img_dir: str):
  common_root = f"data/q4/{img_dir}"
  img1 = cv2.imread(f"{common_root}/image_1.jpg")
  gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
  img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)
  sift1 = cv2.xfeatures2d.SIFT_create()
  kp1, des1 = sift1.detectAndCompute(gray1, None)
  
  img2 = cv2.imread(f"{common_root}/image_2.jpg")
  gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
  img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)
  sift2 = cv2.xfeatures2d.SIFT_create()
  kp2, des2 = sift2.detectAndCompute(gray2, None)

  bf = cv2.BFMatcher()
  matches = bf.knnMatch(des1, des2, k=2)
  
  good = []
  for m, n in matches:
    if m.distance < 0.75 * n.distance:
      good.append(m)
      
  pts1 = []
  pts2 = []
  
  for m in good:
    pts1.append(kp1[m.queryIdx].pt)
    pts2.append(kp2[m.trainIdx].pt)
          
  pts1 = np.array(pts1)
  pts2 = np.array(pts2)
  
  ransac(k=10000, t=1.4, pts1=pts1, pts2=pts2, img1=img1, img2=img2, show_inlier_graph=False, show_best_f=True)
In [41]:
img_dirs = ["barstool", "mug"]

for img_dir in img_dirs:
  q5(img_dir)
 66%|██████▌   | 6580/10000 [00:03<00:01, 1793.90it/s]/var/folders/b0/l1qz1mms5hb1swdf228jq8680000gn/T/ipykernel_66338/4101735544.py:9: RuntimeWarning: divide by zero encountered in divide
  d2 = np.abs((l2 * pts1).sum(axis=1)) / np.linalg.norm(l2[:, :2], axis=1)
100%|██████████| 10000/10000 [00:05<00:00, 1757.60it/s]
No description has been provided for this image
No description has been provided for this image
[[ 0.     -0.     -0.0005]
 [-0.     -0.      0.0013]
 [-0.     -0.0007  1.    ]]
100%|██████████| 10000/10000 [00:04<00:00, 2206.93it/s]
No description has been provided for this image
No description has been provided for this image
[[ 0.      0.     -0.001 ]
 [ 0.     -0.     -0.0023]
 [ 0.0008  0.0019  1.    ]]

I used the SIFT features from the grayscale images to compute keypoints and descriptors. Afterwards, I used a brute force matcher and specifically kNN to match the descriptors together. After filtering to get the good matches, I run RANSAC and just plotted whichever algorithm gave more inliers and output the fundamental matrix $F$.