I am writing a homography by rotation project by following the example here: https://docs.opencv.org/4.4.0/d9/dab/tutorial_homography.html#tutorial_homography_Demo5
For my project, I capture two photos using an XR environment. For each photo, I have 1.) a rotation quaternion given to me by the device, and 2.) a 4×4 camera intrinsics projection matrix of the XR scene. For my purposes I am assuming camera position(translation) does not change.
When I run my script, the stitch fails.
Can someone show me where I am going wrong? I believe this is not working due to a lack of understanding of matrices, or improper conversion of camera projection matrix to camera intrinsics.
Script:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Python 2/3 compatibility
from __future__ import print_function
import numpy as np
import cv2 as cv
def quaternion_to_rotation_matrix(q):
x, y, z, w = q['x'], q['y'], q['z'], q['w']
return np.array([
[1 - 2*(y**2 + z**2), 2*(x*y - z*w), 2*(x*z + y*w)],
[2*(x*y + z*w), 1 - 2*(x**2 + z**2), 2*(y*z - x*w)],
[2*(x*z - y*w), 2*(y*z + x*w), 1 - 2*(x**2 + y**2)]
])
def basicPanoramaStitching(img1Path, img2Path):
img1 = cv.imread(cv.samples.findFile(img1Path))
img2 = cv.imread(cv.samples.findFile(img2Path))
if img1 is None or img2 is None:
print("Error loading images.")
return
# Rotation quaternions from deviceOrientation event
q1 = {'w': -0.7968594431877136, 'x': 0.0034535229206085205, 'y': 0.6041417717933655, 'z': -0.004005712922662497} # Quaternion for camera 1
q2 = {'w': -0.6669896245002747, 'x': 0.0010529130231589079, 'y': -0.7450388669967651, 'z': 0.00638939393684268} # Quaternion for camera 2
# Position data of camera
pos1 = {'x': 0, 'y': 2, 'z': 0}
pos2 = {'x': 0, 'y': 2, 'z': 0}
# Convert quaternion to rotation matrix
R1 = quaternion_to_rotation_matrix(q1)
R2 = quaternion_to_rotation_matrix(q2)
# Print rotation matrices
print("R1:n", R1)
print("R2:n", R2)
# Construct transformation matrices
c1Mo = np.eye(4)
c2Mo = np.eye(4)
c1Mo[0:3, 0:3] = R1
c2Mo[0:3, 0:3] = R2
c1Mo[0:3, 3] = [pos1['x'], pos1['y'], pos1['z']]
c2Mo[0:3, 3] = [pos2['x'], pos2['y'], pos2['z']]
# Raw intrinsics from the device (16-element column-major array).
# This is a 16 dimensional column-major 4x4 projection matrix that gives
# the scene camera the same field of view as the rendered camera feed.
raw_intrinsics = [2.5618553161621094, 0, 0, 0,
0, 1.4930813312530518, 0, 0,
0, 0, -1.0000009536743164, -1,
0, 0, -0.010000004433095455, 0]
# Convert to a 4x4 row-major matrix
intrinsics_4x4 = np.array(raw_intrinsics).reshape((4, 4)).T
# Extract the 3x3 camera intrinsic matrix directly from the 4x4 matrix
cameraMatrix = intrinsics_4x4[:3, :3]
# Since this is a projection matrix, adjust cx and cy based on the third column if necessary
fx, fy = cameraMatrix[0, 0], cameraMatrix[1, 1]
cx, cy = cameraMatrix[0, 2], cameraMatrix[1, 2]
# Adjusting cameraMatrix to a proper intrinsic format if needed
cameraMatrix = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32)
print("Camera Matrix:n", cameraMatrix)
# Compute rotation displacement
R2_transpose = R2.transpose()
R_2to1 = np.dot(R1, R2_transpose)
# Print the difference in rotation matrices
print("R_2to1 (Difference in rotation matrices):n", R_2to1)
# Compute homography
H = cameraMatrix.dot(R_2to1).dot(np.linalg.inv(cameraMatrix))
H = H / H[2, 2]
print("Homography:n", H)
# Apply the homography to the second image to visualize the transformation
transformed_img2 = cv.warpPerspective(img2, H, (img2.shape[1] * 2, img2.shape[0]))
# Visualize the transformed second image
cv.imshow("Transformed Image 2", transformed_img2)
# Stitch images
img_stitch = cv.warpPerspective(img2, H, (img2.shape[1] * 2, img2.shape[0]))
img_stitch[0:img1.shape[0], 0:img1.shape[1]] = img1
img_space = np.zeros((img1.shape[0], 50, 3), dtype=np.uint8)
img_compare = cv.hconcat([img1, img_space, img2])
cv.imshow("Final", img_compare)
cv.imshow("Panorama", img_stitch)
cv.waitKey(0)
def main():
import argparse
parser = argparse.ArgumentParser(description="Code for homography tutorial. Example 5: basic panorama stitching from a rotating camera.")
parser.add_argument("-I1", "--image1", help="path to first image", default="my-capture-135347.jpg")
parser.add_argument("-I2", "--image2", help="path to second image", default="my-capture-135408.jpg")
args = parser.parse_args()
print("Panorama Stitching Started")
basicPanoramaStitching(args.image1, args.image2)
print("Panorama Stitching Completed Successfully")
if __name__ == '__main__':
main()
Debugging outputs:
R1:
[[ 2.69993348e-01 -2.21114543e-03 -9.62859819e-01]
[ 1.05568153e-02 9.99944055e-01 6.63907698e-04]
[ 9.62804484e-01 -1.03439817e-02 2.70001586e-01]]
R2:
[[-0.11024748 0.0069544 0.99387984]
[-0.01009224 0.99991613 -0.00811613]
[-0.99385293 -0.01092526 -0.11016804]]
Camera Matrix:
[[2.5618553 0. 0. ]
[0. 1.4930813 0. ]
[0. 0. 1. ]]
R_2to1 (Difference in rotation matrices):
[[-0.98674843 0.0028789 -0.16223314]
[ 0.00644999 0.99974826 -0.02148971]
[ 0.16213043 -0.02225134 -0.9865186 ]]
Homography:
[[ 1.000233 -0.00500717 0.42129751]
[-0.00381051 -1.01341045 0.03252436]
[-0.06415118 0.01510662 1. ]]
Input Photos:
my-capture-135347.jpg:
my-capture-135408.jpg: