%matplotlib inline
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
from typing import Tuple
from tqdm import tqdm
from PIL import Image
np.set_printoptions(suppress=True, precision=4)
def homogenize(pts: np.ndarray) -> np.ndarray:
return np.append(pts, np.ones((len(pts), 1)), axis=1)
def f_8_pt(pts1: np.ndarray, pts2: np.ndarray) -> np.ndarray:
mean_1 = pts1.mean(axis=0)
d_1 = np.linalg.norm(pts1 - mean_1, axis=1).mean()
s_1 = np.sqrt(2) / d_1
T = np.diag([s_1, s_1, 1])
T[:2, 2] = -s_1 * mean_1
pts1_homogenized = homogenize(pts1)
pts1_normalized = pts1_homogenized @ T.T
mean_2 = pts2.mean(axis=0)
d_2 = np.linalg.norm(pts2 - mean_2, axis=1).mean()
s_2 = np.sqrt(2) / d_2
T_prime = np.diag([s_2, s_2, 1])
T_prime[:2, 2] = -s_2 * mean_2
pts2_homogenized = homogenize(pts2)
pts2_normalized = pts2_homogenized @ T_prime.T
u_prime = pts2_normalized[:, 0]
v_prime = pts2_normalized[:, 1]
u = pts1_normalized[:, 0]
v = pts1_normalized[:, 1]
A = np.column_stack([u_prime * u, u_prime * v, u_prime,
v_prime * u, v_prime * v, v_prime,
u, v, np.ones_like(v)])
# Get f
_, _, Vt = np.linalg.svd(A)
f = Vt[-1]
F = f.reshape(3, -1)
U, S, Vt = np.linalg.svd(F)
S[-1] = 0
F_star = U @ np.diag(S) @ Vt
F_final = T_prime.T @ F_star @ T
F_final /= F_final[-1, -1]
return F_final
def plot_epipolar_lines(F: np.ndarray,
pts: np.ndarray,
img1: np.ndarray,
img2: np.ndarray):
colors = []
epipolar_lines = []
for pt in pts:
color = np.random.randint(255, size=3)
colors.append(color.tolist())
l = F @ np.append(pt, 1)
min_x = 0
max_x = img2.shape[1]
min_y = int((-l[0] * min_x - l[2]) / l[1])
max_y = int((-l[0] * max_x - l[2]) / l[1])
epipolar_lines.append(((min_x, min_y), (max_x, max_y)))
annotated_img1 = img1.copy()
annotated_img2 = img2.copy()
for color, pt, epipolar_line in zip(colors, pts, epipolar_lines):
annotated_img1 = cv2.circle(annotated_img1, pt.astype(int), 10, color, -1)
annotated_img2 = cv2.line(annotated_img2, *epipolar_line, color, 5)
plt.title("Viewpoint 1")
plt.axis("off")
plt.imshow(annotated_img1)
plt.show()
plt.title("Viewpoint 2")
plt.axis("off")
plt.imshow(annotated_img2)
plt.show()
def q1a1(img_dir: str):
common_root = f"data/q1a/{img_dir}"
corresp = np.load(f"{common_root}/corresp.npz", allow_pickle=True)
pts1 = corresp["pts1"]
pts2 = corresp["pts2"]
F = f_8_pt(pts1, pts2)
img1 = np.array(Image.open(f"{common_root}/image1.jpg"))
img2 = np.array(Image.open(f"{common_root}/image2.jpg"))
plot_epipolar_lines(F, pts1, img1, img2)
print(F)
img_dirs = ["bench", "remote"]
for img_dir in img_dirs:
q1a1(img_dir)
[[-0. -0. 0.0002] [-0. 0. 0.0249] [-0.0011 -0.023 1. ]]
[[ 0. -0. 0.0024] [-0. -0. -0.0085] [-0.004 0.0095 1. ]]
My implementation first normalizes each set of points so that they are mean centered and on average unit distance away. Then I use the relation that $x_i'^TFx_i = 0$ to build the constraint matrix. I enforce that $F$ is rank two by manually setting the last singular value to 0 and then perform the reverse transformations back to pixel space.
(A2) E matrix using 8-point algorithm¶
def q1a2(img_dir: str):
common_root = f"data/q1a/{img_dir}"
corresp = np.load(f"{common_root}/corresp.npz", allow_pickle=True)
intrinsics = np.load(f"{common_root}/intrinsics.npz", allow_pickle=True)
pts1 = corresp["pts1"]
pts2 = corresp["pts2"]
K1 = intrinsics["K1"]
K2 = intrinsics["K2"]
F = f_8_pt(pts1, pts2)
E = K2.T @ F @ K1
E /= E[-1, -1]
print(E)
img_dirs = ["bench", "remote"]
for img_dir in img_dirs:
q1a2(img_dir)
[[ -0.4152 -8.8175 -2.1506] [ -1.3804 0.7907 69.0827] [ -3.5889 -68.4744 1. ]] [[ 2.5298 -0.6706 7.3309] [ -6.6203 -3.0277 -28.2986] [-14.5007 26.4182 1. ]]
Since $E$ is related to $F$ by $E = K'TFK$, I just performed that calculation to compute $E$.
(B) 7-point algorithm¶
def f_7_pt(pts1: np.ndarray, pts2: np.ndarray) -> np.ndarray:
mean_1 = pts1.mean(axis=0)
d_1 = np.linalg.norm(pts1 - mean_1, axis=1).mean()
s_1 = np.sqrt(2) / d_1
T = np.diag([s_1, s_1, 1])
T[:2, 2] = -s_1 * mean_1
pts1_homogenized = homogenize(pts1)
pts1_normalized = pts1_homogenized @ T.T
mean_2 = pts2.mean(axis=0)
d_2 = np.linalg.norm(pts2 - mean_2, axis=1).mean()
s_2 = np.sqrt(2) / d_2
T_prime = np.diag([s_2, s_2, 1])
T_prime[:2, 2] = -s_2 * mean_2
pts2_homogenized = homogenize(pts2)
pts2_normalized = pts2_homogenized @ T_prime.T
A = np.zeros((7, 9))
u_prime = pts2_normalized[:, 0]
v_prime = pts2_normalized[:, 1]
u = pts1_normalized[:, 0]
v = pts1_normalized[:, 1]
A = np.column_stack([u_prime * u, u_prime * v, u_prime,
v_prime * u, v_prime * v, v_prime,
u, v, np.ones_like(v)])
# Get f
_, _, Vt = np.linalg.svd(A)
f1 = Vt[-2]
f2 = Vt[-1]
F1 = f1.reshape(3, -1)
F2 = f2.reshape(3, -1)
polynomial = lambda a: a * F1 + (1 - a) * F2
c = np.linalg.det(list(map(polynomial, range(4))))
A = np.array([[0, 0, 0, 1],
[1, 1, 1, 1],
[8, 4, 2, 1],
[27, 9, 3, 1]])
coefficients = np.linalg.solve(A, c)
a_roots = np.roots(coefficients)
real_roots = list(map(lambda x: x.real, filter(lambda x: np.isclose(x.imag, 0), a_roots)))
Fs_final = []
for real_root in real_roots:
interpolated_F = polynomial(real_root)
F_final = T_prime.T @ interpolated_F @ T
F_final /= F_final[-1, -1]
Fs_final.append(F_final)
return np.stack(Fs_final)
def q1b(img_dir: str):
common_root = f"data/q1b/{img_dir}"
corresp = np.load(f"{common_root}/corresp.npz", allow_pickle=True)
pts1 = corresp["pts1"]
pts2 = corresp["pts2"]
Fs = f_7_pt(pts1, pts2)
img1 = np.array(Image.open(f"{common_root}/image1.jpg"))
img2 = np.array(Image.open(f"{common_root}/image2.jpg"))
for F in Fs:
plot_epipolar_lines(F, pts1, img1, img2)
print(F)
img_dirs = [
"ball",
"hydrant"
]
for img_dir in img_dirs:
q1b(img_dir)
[[-0. -0. -0.0067] [ 0. -0. -0.015 ] [ 0.0091 0.0149 1. ]]
[[ 0. 0. -0.0086] [-0. 0. 0.0077] [ 0.0063 -0.0119 1. ]]
[[ 0. -0. -0.0014] [ 0. 0. -0.0167] [ 0.0011 0.0164 1. ]]
[[ 0. 0. -0.0067] [-0. 0. 0.0014] [ 0.005 -0.0046 1. ]]
For my implementation of the 7-point algorithm, I did the point normalization, constructed the same constraints as with the 8-point algorithm, then found the coefficients of the polynomial $\det(\lambda F_1 + (1-\lambda)F_2)$ by evaluating the polynomial at $\lambda=0,1,2,3$. I return all of the real valued roots of the polynomial and create the corresponding $F$ matrices based off of those roots. I manually inspect the epipolar lines to determine which is the correct one (if there are multiple real roots). For the hydrant, the second solution seems correct given the difference in views.
Q2: RANSAC with 7-point and 8-point algorithm¶
def calculate_errors(F: np.ndarray, pts1: np.ndarray, pts2: np.ndarray) -> np.ndarray:
pts1 = homogenize(pts1)
pts2 = homogenize(pts2)
l1 = pts1 @ F.T
l2 = pts2 @ F
d1 = np.abs((l1 * pts2).sum(axis=1)) / np.linalg.norm(l1[:, :2], axis=1)
d2 = np.abs((l2 * pts1).sum(axis=1)) / np.linalg.norm(l2[:, :2], axis=1)
return d1 + d2
def ransac(k: int, t: float, pts1: np.ndarray, pts2: np.ndarray, img1: np.ndarray, img2: np.ndarray, show_inlier_graph: bool = True, show_best_f: bool = False):
d_min_8pt = d_min_7pt = 0
F_best_8pt = F_best_7pt = None
inlier_8pts = None
inlier_7pts = None
percent_inliers_8pt = []
percent_inliers_7pt = []
N = len(pts1)
for _ in tqdm(range(k)):
random_8pts = np.random.choice(N, 8, replace=False)
F_8pt = f_8_pt(pts1[random_8pts], pts2[random_8pts])
error_8pt = calculate_errors(F_8pt, pts1, pts2)
d_8pt = (error_8pt <= t).sum()
if d_8pt > d_min_8pt:
inlier_8pts = pts1[error_8pt <= t]
d_min_8pt = d_8pt
F_best_8pt = F_8pt
percent_inliers_8pt.append(d_min_8pt / N)
random_7pts = np.random.choice(N, 7, replace=False)
Fs_7pt = f_7_pt(pts1[random_7pts], pts2[random_7pts])
for F_7pt in Fs_7pt:
error_7pt = calculate_errors(F_7pt, pts1, pts2)
d_7pt = (error_7pt <= t).sum()
if d_7pt > d_min_7pt:
inlier_7pts = pts1[error_7pt <= t]
d_min_7pt = d_7pt
F_best_7pt = F_7pt
percent_inliers_7pt.append(d_min_7pt / N)
if not show_best_f:
plot_epipolar_lines(F_best_8pt, inlier_8pts, img1, img2)
print(F_best_8pt)
plot_epipolar_lines(F_best_7pt, inlier_7pts, img1, img2)
print(F_best_7pt)
else:
if inlier_8pts.shape[0] > inlier_7pts.shape[0]:
plot_epipolar_lines(F_best_8pt, inlier_8pts, img1, img2)
print(F_best_8pt)
else:
plot_epipolar_lines(F_best_7pt, inlier_7pts, img1, img2)
print(F_best_7pt)
if show_inlier_graph:
plt.plot(percent_inliers_8pt, label="8-point")
plt.plot(percent_inliers_7pt, label="7-point")
plt.xlabel("iterations")
plt.ylabel("Percent Inliers")
plt.legend(loc="lower right")
plt.show()
def q2(img_dir: str):
common_root = f"data/{img_dir}"
corresp = np.load(f"{common_root}/corresp_noisy.npz", allow_pickle=True)
pts1 = corresp["pts1"]
pts2 = corresp["pts2"]
img1 = np.array(Image.open(f"{common_root}/image1.jpg"))
img2 = np.array(Image.open(f"{common_root}/image2.jpg"))
ransac(k=10000, t=2, pts1=pts1, pts2=pts2, img1=img1, img2=img2)
img_dirs = ["q1a/bench", "q1a/remote", "q1b/ball", "q1b/hydrant"]
for img_dir in img_dirs:
q2(img_dir)
100%|██████████| 10000/10000 [00:08<00:00, 1147.95it/s]
[[-0. -0. -0.0001] [-0. 0. 0.0236] [-0.0008 -0.0221 1. ]]
[[-0. -0. 0.0001] [-0. 0. 0.0244] [-0.001 -0.0227 1. ]]
0%| | 0/10000 [00:00<?, ?it/s]/var/folders/b0/l1qz1mms5hb1swdf228jq8680000gn/T/ipykernel_66338/4101735544.py:8: RuntimeWarning: divide by zero encountered in divide d1 = np.abs((l1 * pts2).sum(axis=1)) / np.linalg.norm(l1[:, :2], axis=1) 5%|▌ | 515/10000 [00:00<00:03, 2655.20it/s]/var/folders/b0/l1qz1mms5hb1swdf228jq8680000gn/T/ipykernel_66338/4101735544.py:9: RuntimeWarning: divide by zero encountered in divide d2 = np.abs((l2 * pts1).sum(axis=1)) / np.linalg.norm(l2[:, :2], axis=1) /var/folders/b0/l1qz1mms5hb1swdf228jq8680000gn/T/ipykernel_66338/4101735544.py:9: RuntimeWarning: invalid value encountered in divide d2 = np.abs((l2 * pts1).sum(axis=1)) / np.linalg.norm(l2[:, :2], axis=1) 30%|██▉ | 2980/10000 [00:01<00:02, 3087.51it/s]/var/folders/b0/l1qz1mms5hb1swdf228jq8680000gn/T/ipykernel_66338/4101735544.py:8: RuntimeWarning: invalid value encountered in divide d1 = np.abs((l1 * pts2).sum(axis=1)) / np.linalg.norm(l1[:, :2], axis=1) 100%|██████████| 10000/10000 [00:03<00:00, 3125.39it/s]
[[-0. 0.0001 -0.0045] [-0.0001 -0. -0.0333] [ 0.0023 0.0569 1. ]]
[[ 0. -0. 0.0035] [ 0. -0. -0.0102] [-0.006 0.0121 1. ]]
100%|██████████| 10000/10000 [00:07<00:00, 1280.14it/s]
[[-0. -0. -0.0057] [ 0. -0. -0.0148] [ 0.008 0.0149 1. ]]
[[-0. -0. -0.0065] [ 0. -0. -0.0146] [ 0.0088 0.0145 1. ]]
100%|██████████| 10000/10000 [00:09<00:00, 1079.96it/s]
[[-0. -0. 0.0009] [ 0. -0. -0.0226] [-0.0014 0.0233 1. ]]
[[ 0. -0. -0.0011] [ 0. -0. -0.017 ] [ 0.0008 0.0168 1. ]]
In my implementation of RANSAC, I sample 8 or 7 random point correspondences, then calculate the distances of the points to the epipolar lines. If there are multiple real roots for the 7 point algorithm, I loop through all of them.
Q3: Triangulation¶
def triangulate(img1: np.ndarray,
img2: np.ndarray,
P1: np.ndarray,
P2: np.ndarray,
pts1: np.ndarray,
pts2: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
pts = []
colors = []
above_main_diag_indices = np.triu_indices(3, 1)
for pt1, pt2 in zip(pts1, pts2):
A = np.zeros((4, 4))
pt1_cross_entries = np.append(pt1.copy(), 1)[::-1]
pt1_cross_entries[::2] *= -1
pt1_cross_matrix = np.zeros((3, 3))
pt1_cross_matrix[above_main_diag_indices] = pt1_cross_entries
pt1_cross_matrix -= pt1_cross_matrix.T
A[:2] = (pt1_cross_matrix @ P1)[:2]
pt2_cross_entries = np.append(pt2.copy(), 1)[::-1]
pt2_cross_entries[::2] *= -1
pt2_cross_matrix = np.zeros((3, 3))
pt2_cross_matrix[above_main_diag_indices] = pt2_cross_entries
pt2_cross_matrix -= pt2_cross_matrix.T
A[2:] = (pt2_cross_matrix @ P2)[:2]
_, _, Vt = np.linalg.svd(A)
X = Vt[-1]
X /= X[-1]
pts.append(X[:3])
img1_color = img1[tuple(pt1[::-1])].astype(float)
img2_color = img2[tuple(pt2[::-1])].astype(float)
colors.append((img1_color + img2_color) / 2)
pts = np.stack(pts)
colors = np.stack(colors)
return pts, colors
def q3():
img1 = np.array(Image.open("data/q3/img1.jpg"))
img2 = np.array(Image.open("data/q3/img2.jpg"))
P1 = np.load("data/q3/P1.npy")
P2 = np.load("data/q3/P2.npy")
pts1 = np.load("data/q3/pts1.npy")
pts2 = np.load("data/q3/pts2.npy")
pts, colors = triangulate(img1, img2, P1, P2, pts1, pts2)
fig = plt.figure()
ax = fig.add_subplot(projection='3d')
ax.scatter(pts[:, 0], pts[:, 1], pts[:, 2], c=colors / 255., s=0.2)
ax.view_init(azim=-100, elev=-60)
plt.show()
q3()
To triangulate, I created the constraints using the relations $x \times PX = 0$ and $x' \times P'X = 0$ for each pair of corresponding points $x, x'$. I average the colors of the corresponding pixels and just plot the point cloud from there.
Q4: Reconstruct your own scene!¶
def q4(img_dir: str):
images = list(os.scandir(f"data/q4/{img_dir}"))[:6]
num_columns = int(np.ceil(len(images) / 2))
_, axs = plt.subplots(2, num_columns)
axs = axs.flatten()
for img, ax in zip(images, axs):
img = np.array(Image.open(img.path))
ax.set_axis_off()
ax.imshow(img)
if len(images) % 2:
axs[-1].remove()
plt.show()
image_dirs = ["bag", "mug2"]
for image_dir in image_dirs:
q4(image_dir)
Q5: Bonus 1 - Fundamental matrix estimation on your own images.¶
def q5(img_dir: str):
common_root = f"data/q4/{img_dir}"
img1 = cv2.imread(f"{common_root}/image_1.jpg")
gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)
sift1 = cv2.xfeatures2d.SIFT_create()
kp1, des1 = sift1.detectAndCompute(gray1, None)
img2 = cv2.imread(f"{common_root}/image_2.jpg")
gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)
sift2 = cv2.xfeatures2d.SIFT_create()
kp2, des2 = sift2.detectAndCompute(gray2, None)
bf = cv2.BFMatcher()
matches = bf.knnMatch(des1, des2, k=2)
good = []
for m, n in matches:
if m.distance < 0.75 * n.distance:
good.append(m)
pts1 = []
pts2 = []
for m in good:
pts1.append(kp1[m.queryIdx].pt)
pts2.append(kp2[m.trainIdx].pt)
pts1 = np.array(pts1)
pts2 = np.array(pts2)
ransac(k=10000, t=1.4, pts1=pts1, pts2=pts2, img1=img1, img2=img2, show_inlier_graph=False, show_best_f=True)
img_dirs = ["barstool", "mug"]
for img_dir in img_dirs:
q5(img_dir)
66%|██████▌ | 6580/10000 [00:03<00:01, 1793.90it/s]/var/folders/b0/l1qz1mms5hb1swdf228jq8680000gn/T/ipykernel_66338/4101735544.py:9: RuntimeWarning: divide by zero encountered in divide d2 = np.abs((l2 * pts1).sum(axis=1)) / np.linalg.norm(l2[:, :2], axis=1) 100%|██████████| 10000/10000 [00:05<00:00, 1757.60it/s]
[[ 0. -0. -0.0005] [-0. -0. 0.0013] [-0. -0.0007 1. ]]
100%|██████████| 10000/10000 [00:04<00:00, 2206.93it/s]
[[ 0. 0. -0.001 ] [ 0. -0. -0.0023] [ 0.0008 0.0019 1. ]]
I used the SIFT features from the grayscale images to compute keypoints and descriptors. Afterwards, I used a brute force matcher and specifically kNN to match the descriptors together. After filtering to get the good matches, I run RANSAC and just plotted whichever algorithm gave more inliers and output the fundamental matrix $F$.