I’m encountering an issue with the ImageRegistrator in Kornia while trying to register two frames using translation. The function consistently returns incorrect translation values, even for a basic synthetic test case. Below is the code I used to test the translation registration. Could someone please guide me on the correct approach?
Reproduction steps
import numpy as np
import torch
import kornia as K
import kornia.geometry as KG
import cv2
import matplotlib.pyplot as plt
from typing import Tuple
def find_frame_translation_kornia(frame_1: np.ndarray, frame_2: np.ndarray) -> Tuple[Tuple[float, float], np.ndarray, bool]:
"""Find translation between two frames using Kornia's ImageRegistrator."""
frame_1 = K.image_to_tensor(frame_1, False).float() / 255.0 # Shape: (C, H, W)
frame_2 = K.image_to_tensor(frame_2, False).float() / 255.0 # Shape: (C, H, W)
if frame_1.ndim == 3:
frame_1 = frame_1.unsqueeze(0) # Shape: (1, C, H, W)
if frame_2.ndim == 3:
frame_2 = frame_2.unsqueeze(0) # Shape: (1, C, H, W)
registrator = KG.ImageRegistrator("translation") # same results for "similarity", "homography", etc.
try:
model = registrator.register(frame_1, frame_2)
tx, ty = model[0, :2, 2].cpu().detach().numpy()
shift = (tx, ty)
M = np.array([[1, 0, tx], [0, 1, ty]], dtype=np.float32)
success = True
except Exception as e:
print(f"Registration failed: {e}")
shift, M, success = (0, 0), np.eye(3, dtype=np.float32)[:2, :], False
return shift, M, success
def create_translated_image(image: np.ndarray, tx: int, ty: int) -> np.ndarray:
rows, cols = image.shape
M = np.float32([[1, 0, tx], [0, 1, ty]])
translated_image = cv2.warpAffine(image, M, (cols, rows))
return translated_image
def test_find_frame_translation_kornia():
image = np.zeros((100, 100), dtype=np.uint8)
image[30:70, 30:70] = 255 # A white square in the center
tx, ty = 5, -3
translated_image = create_translated_image(image, tx, ty)
plt.subplot(1, 2, 1)
plt.imshow(image, cmap='gray')
plt.title("Original Image")
plt.subplot(1, 2, 2)
plt.imshow(translated_image, cmap='gray')
plt.title("Translated Image")
plt.show()
shift, M, success = find_frame_translation_kornia(image, translated_image)
assert success, "Registration failed"
np.testing.assert_almost_equal(shift, (tx, ty), decimal=1)
expected_M = np.array([[1, 0, tx], [0, 1, ty]], dtype=np.float32)
np.testing.assert_almost_equal(M, expected_M, decimal=1)
if __name__ == "__main__":
test_find_frame_translation_kornia()
Issue:
When running the test, the function returns significantly incorrect translation values. Specifically, for a known translation of (5, -3), the function returns (-0.1, 0.1). Here is the error message I received:
AssertionError:
Arrays are not almost equal to 1 decimals
Mismatched elements: 2 / 2 (100%)
Max absolute difference: 5.09875337
Max relative difference: 1.02040539
x: array([-0.1, 0.1], dtype=float32)
y: array([ 5, -3])
Expected behavior
The function should correctly identify the translation (5, -3) between the original and translated images.
Environment
- PyTorch Version (e.g., 1.0): 2.4.0+cpu
- OS (e.g., Linux): Microsoft Windows 10 Pro
- How you installed PyTorch (
conda
,pip
, source):
[pip3] torch-pitch-shift==1.2.4
[pip3] torchaudio==2.3.0
[pip3] torchmetrics==1.4.0.post0
[conda] numpy 1.26.4 pypi_0 pypi
[conda] torch 2.4.0 pypi_0 pypi - Build command you used (if compiling from source):
- Python version: 3.9.19
- CUDA/cuDNN version: False
- GPU models and configuration: False
- Any other relevant information: