Tau-J · AurelienCoppee · Feb 12, 2025 · Feb 13, 2025
diff --git a/rtmlib/tools/object_detection/rtmdet.py b/rtmlib/tools/object_detection/rtmdet.py
@@ -38,26 +38,29 @@ def preprocess(self, img: np.ndarray):
 
         Returns:
             tuple:
-            - resized_img (np.ndarray): Preprocessed image.
-            - center (np.ndarray): Center of image.
-            - scale (np.ndarray): Scale of image.
+            - padded_img (np.ndarray): Preprocessed image.
+            - ratio (float): Scale factor applied to the image.
         """
-        if len(img.shape) == 3:
-            padded_img = np.ones(
-                (self.model_input_size[0], self.model_input_size[1], 3),
-                dtype=np.uint8) * 114
+        if img.shape[:2] == tuple(self.model_input_size[:2]):
+            padded_img = img.copy()
+            ratio = 1.
         else:
-            padded_img = np.ones(self.model_input_size, dtype=np.uint8) * 114
-
-        ratio = min(self.model_input_size[0] / img.shape[0],
-                    self.model_input_size[1] / img.shape[1])
-        resized_img = cv2.resize(
-            img,
-            (int(img.shape[1] * ratio), int(img.shape[0] * ratio)),
-            interpolation=cv2.INTER_LINEAR,
-        ).astype(np.uint8)
-        padded_shape = (int(img.shape[0] * ratio), int(img.shape[1] * ratio))
-        padded_img[:padded_shape[0], :padded_shape[1]] = resized_img
+            if len(img.shape) == 3:
+                padded_img = np.ones(
+                    (self.model_input_size[0], self.model_input_size[1], 3),
+                    dtype=np.uint8) * 114
+            else:
+                padded_img = np.ones(self.model_input_size, dtype=np.uint8) * 114
+
+            ratio = min(self.model_input_size[0] / img.shape[0],
+                        self.model_input_size[1] / img.shape[1])
+            resized_img = cv2.resize(
+                img,
+                (int(img.shape[1] * ratio), int(img.shape[0] * ratio)),
+                interpolation=cv2.INTER_LINEAR,
+            ).astype(np.uint8)
+            padded_shape = (int(img.shape[0] * ratio), int(img.shape[1] * ratio))
+            padded_img[:padded_shape[0], :padded_shape[1]] = resized_img
 
         # normalize image
         if self.mean is not None:

diff --git a/rtmlib/tools/object_detection/yolox.py b/rtmlib/tools/object_detection/yolox.py
@@ -38,26 +38,29 @@ def preprocess(self, img: np.ndarray):
 
         Returns:
             tuple:
-            - resized_img (np.ndarray): Preprocessed image.
-            - center (np.ndarray): Center of image.
-            - scale (np.ndarray): Scale of image.
+            - padded_img (np.ndarray): Preprocessed image.
+            - ratio (float): Scale factor applied to the image.
         """
-        if len(img.shape) == 3:
-            padded_img = np.ones(
-                (self.model_input_size[0], self.model_input_size[1], 3),
-                dtype=np.uint8) * 114
+        if img.shape[:2] == tuple(self.model_input_size[:2]):
+            padded_img = img.copy()
+            ratio = 1.
         else:
-            padded_img = np.ones(self.model_input_size, dtype=np.uint8) * 114
-
-        ratio = min(self.model_input_size[0] / img.shape[0],
-                    self.model_input_size[1] / img.shape[1])
-        resized_img = cv2.resize(
-            img,
-            (int(img.shape[1] * ratio), int(img.shape[0] * ratio)),
-            interpolation=cv2.INTER_LINEAR,
-        ).astype(np.uint8)
-        padded_shape = (int(img.shape[0] * ratio), int(img.shape[1] * ratio))
-        padded_img[:padded_shape[0], :padded_shape[1]] = resized_img
+            if len(img.shape) == 3:
+                padded_img = np.ones(
+                    (self.model_input_size[0], self.model_input_size[1], 3),
+                    dtype=np.uint8) * 114
+            else:
+                padded_img = np.ones(self.model_input_size, dtype=np.uint8) * 114
+
+            ratio = min(self.model_input_size[0] / img.shape[0],
+                        self.model_input_size[1] / img.shape[1])
+            resized_img = cv2.resize(
+                img,
+                (int(img.shape[1] * ratio), int(img.shape[0] * ratio)),
+                interpolation=cv2.INTER_LINEAR,
+            ).astype(np.uint8)
+            padded_shape = (int(img.shape[0] * ratio), int(img.shape[1] * ratio))
+            padded_img[:padded_shape[0], :padded_shape[1]] = resized_img
 
         return padded_img, ratio
 

diff --git a/rtmlib/tools/pose_estimation/rtmo.py b/rtmlib/tools/pose_estimation/rtmo.py
@@ -48,26 +48,29 @@ def preprocess(self, img: np.ndarray):
 
         Returns:
             tuple:
-            - resized_img (np.ndarray): Preprocessed image.
-            - center (np.ndarray): Center of image.
-            - scale (np.ndarray): Scale of image.
+            - padded_img (np.ndarray): Preprocessed image.
+            - ratio (float): Scale factor applied to the image.
         """
-        if len(img.shape) == 3:
-            padded_img = np.ones(
-                (self.model_input_size[0], self.model_input_size[1], 3),
-                dtype=np.uint8) * 114
+        if img.shape[:2] == tuple(self.model_input_size[:2]):
+            padded_img = img.copy()
+            ratio = 1.
         else:
-            padded_img = np.ones(self.model_input_size, dtype=np.uint8) * 114
-
-        ratio = min(self.model_input_size[0] / img.shape[0],
-                    self.model_input_size[1] / img.shape[1])
-        resized_img = cv2.resize(
-            img,
-            (int(img.shape[1] * ratio), int(img.shape[0] * ratio)),
-            interpolation=cv2.INTER_LINEAR,
-        ).astype(np.uint8)
-        padded_shape = (int(img.shape[0] * ratio), int(img.shape[1] * ratio))
-        padded_img[:padded_shape[0], :padded_shape[1]] = resized_img
+            if len(img.shape) == 3:
+                padded_img = np.ones(
+                    (self.model_input_size[0], self.model_input_size[1], 3),
+                    dtype=np.uint8) * 114
+            else:
+                padded_img = np.ones(self.model_input_size, dtype=np.uint8) * 114
+
+            ratio = min(self.model_input_size[0] / img.shape[0],
+                        self.model_input_size[1] / img.shape[1])
+            resized_img = cv2.resize(
+                img,
+                (int(img.shape[1] * ratio), int(img.shape[0] * ratio)),
+                interpolation=cv2.INTER_LINEAR,
+            ).astype(np.uint8)
+            padded_shape = (int(img.shape[0] * ratio), int(img.shape[1] * ratio))
+            padded_img[:padded_shape[0], :padded_shape[1]] = resized_img
 
         # normalize image
         if self.mean is not None:

diff --git a/rtmlib/tools/solution/pose_tracker.py b/rtmlib/tools/solution/pose_tracker.py
@@ -185,7 +185,7 @@ def __call__(self, image: np.ndarray):
             keypoints, scores = self.pose_model(image)
 
 
-        if not self.tracking:
+        if not self.tracking and self.det_frequency != 1:
             # without tracking
             bboxes_current_frame = []
             for kpts in keypoints: