talmolab
diff --git a/‎sleap_nn/data/augmentation.py‎
Lines changed: 1 addition & 1 deletion b/‎sleap_nn/data/augmentation.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sleap_nn/data/custom_datasets.py‎
Lines changed: 60 additions & 30 deletions b/‎sleap_nn/data/custom_datasets.py‎
Lines changed: 60 additions & 30 deletions
diff --git a/‎sleap_nn/data/instance_cropping.py‎
Lines changed: 135 additions & 0 deletions b/‎sleap_nn/data/instance_cropping.py‎
Lines changed: 135 additions & 0 deletions
diff --git a/‎sleap_nn/data/utils.py‎
Lines changed: 57 additions & 0 deletions b/‎sleap_nn/data/utils.py‎
Lines changed: 57 additions & 0 deletions
diff --git a/‎sleap_nn/inference/predictors.py‎
Lines changed: 3 additions & 3 deletions b/‎sleap_nn/inference/predictors.py‎
Lines changed: 3 additions & 3 deletions
@@ -154,7 +154,7 @@ def apply_geometric_augmentation(
     if affine_p > 0:
         aug_stack.append(
             K.augmentation.RandomAffine(
-                degrees=(rotation_min, rotation_max),
+                degrees=(rotation_min, rotation_min),
                 translate=(translate_width, translate_height),
                 scale=(scale_min, scale_max),
                 p=affine_p,
 
@@ -20,7 +20,9 @@
 from sleap_nn.config.utils import get_backbone_type_from_cfg, get_model_type_from_cfg
 from sleap_nn.data.identity import generate_class_maps, make_class_vectors
 from sleap_nn.data.instance_centroids import generate_centroids
-from sleap_nn.data.instance_cropping import generate_crops
+from sleap_nn.data.instance_cropping import generate_crops, find_instance_crop_size
+
+
 from sleap_nn.data.normalization import (
     apply_normalization,
     convert_to_grayscale,
@@ -34,7 +36,7 @@
 )
 from sleap_nn.data.confidence_maps import generate_confmaps, generate_multiconfmaps
 from sleap_nn.data.edge_maps import generate_pafs
-from sleap_nn.data.instance_cropping import make_centered_bboxes
+from sleap_nn.data.instance_cropping import make_centered_bboxes, get_cropped_img
 from sleap_nn.training.utils import is_distributed_initialized
 from sleap_nn.config.get_config import get_aug_config
 
@@ -738,6 +740,7 @@ def __init__(
         self.confmap_head_config = confmap_head_config
         self.instance_idx_list = self._get_instance_idx_list(labels)
         self.cache_lf = [None, None]
+        # self.max_crop_size = find_instance_crop_size(self.labels, maximum_stride=self.max_stride)
 
     def _get_instance_idx_list(self, labels: List[sio.Labels]) -> List[Tuple[int]]:
         """Return list of tuples with indices of labelled frames and instances."""
@@ -840,24 +843,41 @@ def __getitem__(self, index) -> Dict:
             scale=self.scale,
         )
 
-        # get the centroids based on the anchor idx
-        centroids = generate_centroids(instances, anchor_ind=self.anchor_ind)
+        instance = instances[0]
 
-        instance, centroid = instances[0], centroids[0]  # (n_samples=1)
+        sample = {}
 
-        crop_size = np.array([self.crop_size, self.crop_size]) * np.sqrt(
-            2
-        )  # crop extra for rotation augmentation
-        crop_size = crop_size.astype(np.int32).tolist()
+        # Get the head index
+        head_idx = self.anchor_ind
 
-        sample = generate_crops(image, instance, centroid, crop_size)
+        # Determine if the instance has enough valid points
+        valid_points = instance[~torch.isnan(instance).any(dim=1)]
+        if valid_points.shape[0] < 3:
+            return self.__getitem__((index + 1) % len(self))  # safely retry next sample
+
+        # crop image
+        sample_image, sample_instance, src_pts, dst_pts, rotated = get_cropped_img(
+            image[0], instance, head_idx
+        )
+        sample_image, sample_instance = sample_image.unsqueeze(
+            0
+        ), sample_instance.unsqueeze(0)
+
+        sample["instance_image"] = sample_image
+        sample["instance"] = sample_instance
+        sample["src_pts"] = src_pts.unsqueeze(0)
+        sample["dst_pts"] = dst_pts.unsqueeze(0)
+        sample["rotated"] = torch.tensor([rotated], dtype=torch.bool)
 
         sample["frame_idx"] = torch.tensor(lf_frame_idx, dtype=torch.int32)
         sample["video_idx"] = torch.tensor(video_idx, dtype=torch.int32)
         sample["num_instances"] = num_instances
         sample["orig_size"] = torch.Tensor([orig_img_height, orig_img_width]).unsqueeze(
             0
         )
+        height, width = sample_image.shape[-2:]
+        sample["height"] = [height]
+        sample["width"] = [width]
         sample["eff_scale"] = torch.tensor(eff_scale, dtype=torch.float32)
 
         # apply augmentation
@@ -883,27 +903,32 @@ def __getitem__(self, index) -> Dict:
                 )
 
         # re-crop to original crop size
-        sample["instance_bbox"] = torch.unsqueeze(
-            make_centered_bboxes(sample["centroid"][0], self.crop_size, self.crop_size),
-            0,
-        )  # (n_samples=1, 4, 2)
-
-        sample["instance_image"] = crop_and_resize(
+        # sample["instance_bbox"] = torch.unsqueeze(
+        #     make_centered_bboxes(sample["centroid"][0], self.crop_size, self.crop_size),
+        #     0,
+        # )  # (n_samples=1, 4, 2)
+
+        # sample["instance_image"] = crop_and_resize(
+        #     sample["instance_image"],
+        #     boxes=sample["instance_bbox"],
+        #     size=(self.crop_size, self.crop_size),
+        # )
+        # size matcher
+        sample_image, eff_scale = apply_sizematcher(
             sample["instance_image"],
-            boxes=sample["instance_bbox"],
-            size=(self.crop_size, self.crop_size),
+            max_height=self.crop_size,
+            max_width=self.crop_size,
         )
-        point = sample["instance_bbox"][0][0]
-        center_instance = sample["instance"] - point
-        centered_centroid = sample["centroid"] - point
-
-        sample["instance"] = center_instance  # (n_samples=1, n_nodes, 2)
-        sample["centroid"] = centered_centroid  # (n_samples=1, 2)
+        # point = sample["instance_bbox"][0][0]
+        # center_instance = sample["instance"] - point
+        # centered_centroid = sample["centroid"] - point
 
-        # Pad the image (if needed) according max stride
-        sample["instance_image"] = apply_pad_to_stride(
-            sample["instance_image"], max_stride=self.max_stride
-        )
+        # sample["instance"] = center_instance  # (n_samples=1, n_nodes, 2)
+        # sample["centroid"] = centered_centroid  # (n_samples=1, 2)
+        sample_instance = sample["instance"] * eff_scale
+        sample["instance"] = sample_instance
+        sample["instance_image"] = sample_image
+        sample["scale"] = torch.tensor(eff_scale, dtype=torch.float32).unsqueeze(dim=0)
 
         img_hw = sample["instance_image"].shape[-2:]
 
@@ -1831,7 +1856,12 @@ def get_train_val_datasets(
             ),
             scale=config.data_config.preprocessing.scale,
             apply_aug=config.data_config.use_augmentations_train,
-            crop_size=config.data_config.preprocessing.crop_size,
+            crop_size=find_instance_crop_size(
+                train_labels,
+                maximum_stride=config.model_config.backbone_config[f"{backbone_type}"][
+                    "max_stride"
+                ],
+            ),
             max_hw=(
                 config.data_config.preprocessing.max_height,
                 config.data_config.preprocessing.max_width,
@@ -1855,7 +1885,7 @@ def get_train_val_datasets(
             geometric_aug=None,
             scale=config.data_config.preprocessing.scale,
             apply_aug=False,
-            crop_size=config.data_config.preprocessing.crop_size,
+            crop_size=train_dataset.crop_size,
             max_hw=(
                 config.data_config.preprocessing.max_height,
                 config.data_config.preprocessing.max_width,
 
@@ -6,6 +6,141 @@
 import sleap_io as sio
 import torch
 from kornia.geometry.transform import crop_and_resize
+from sleap_nn.data.utils import rotating_calipers
+import kornia
+
+
+def get_cropped_img(image: torch.Tensor, instance: torch.Tensor, head_idx: int):
+    """Crop and rotate an image using the oriented bounding box (OBB) of a given instance.
+
+    This function performs a padding-aware crop around the instance keypoints using the minimum-area
+    rotating calipers OBB. It then aligns the longest edge with the x-axis, warps the image and keypoints
+    accordingly, and applies a conditional 180° rotation if the head is facing left. The output is a
+    torch-native equivalent of OpenCV's getAffineTransform + warpAffine behavior.
+
+    Args:
+        image (torch.Tensor): A float tensor of shape (C, H, W), representing an RGB image.
+        instance (torch.Tensor): A float tensor of shape (N, 2), representing keypoint coordinates of one instance.
+        head_idx (int): Index of the head keypoint, used to determine leftward orientation.
+
+    Returns:
+        cropped_image (torch.Tensor): Cropped and rotated image of shape (C, H, W), aligned to face +x.
+        adjusted_kpts (torch.Tensor): Keypoints of shape (N, 2), transformed to match the cropped image coordinates.
+        src_pts (torch.Tensor): Three source points from the padded OBB used for affine transformation (3, 2).
+        dst_pts (torch.Tensor): Three target points in the crop destination space used for affine warping (3, 2).
+        rotated (bool): True if the instance was rotated 180° to face the positive x-axis, otherwise False.
+    """
+    # Define padding
+    pad = 32
+
+    # ensure dtype
+    image = image.float()
+    device = image.device
+    instance = instance.to(device)
+
+    # Get OBB from keypoints
+    obb_coords = rotating_calipers(instance)
+
+    # Find longest edge and roll OBB
+    dists = torch.norm(obb_coords - torch.roll(obb_coords, shifts=-1, dims=0), dim=1)
+    max_index = torch.argmax(dists)
+    obb_coords = torch.roll(obb_coords, shifts=max_index.item(), dims=0)
+
+    # Compute padded OBB by expanding each corner outward from center
+    center = obb_coords.mean(dim=0, keepdims=True)
+    vecs = obb_coords - center
+    norms = torch.norm(vecs, dim=1).unsqueeze(1)  # shape: (4, 1)
+    norms = torch.where(
+        norms == 0, torch.ones_like(norms), norms
+    )  # avoid division by zero
+
+    # Find the OBB edge closest to the x-axis (smallest absolute angle)
+    best_idx = 0
+    min_abs_angle = float("inf")
+    for i in range(4):
+        edge = obb_coords[(i + 1) % 4] - obb_coords[i]
+        angle = torch.atan2(edge[1], edge[0])
+        if abs(angle) < min_abs_angle:
+            min_abs_angle = abs(angle)
+            best_idx = i
+
+    # Roll so this edge is [0] -> [1]
+    obb_coords = torch.roll(obb_coords, shifts=-best_idx, dims=0)
+    edge = obb_coords[1] - obb_coords[0]
+    angle = torch.atan2(edge[1], edge[0])
+
+    # If the edge points left, reverse the OBB
+    if edge[0] < 0:
+        obb_coords = obb_coords[::-1]
+        edge = obb_coords[1] - obb_coords[0]
+        angle = torch.atan2(edge[1], edge[0])
+
+    # Defining the width/height based on the obb coordinates
+    width = torch.norm(obb_coords[1] - obb_coords[0])
+    height = torch.norm(obb_coords[3] - obb_coords[0])
+
+    # If the crop is taller than wide, rotate OBB by 90 deg to make it horizontal
+    if height > width:
+        obb_coords = torch.roll(obb_coords, shifts=-1, dims=0)  # rotate OBB 90 degrees
+        edge = obb_coords[1] - obb_coords[0]
+        angle = torch.atan2(edge[1], edge[0])
+        if edge[0] < 0:
+            obb_coords = torch.flip(obb_coords, dims=[0])
+            edge = obb_coords[1] - obb_coords[0]
+            angle = torch.atan2(edge[1], edge[0])
+        width = torch.norm(obb_coords[1] - obb_coords[0])
+        height = torch.norm(obb_coords[3] - obb_coords[0])
+
+    # Add padding to the final crop dimensions
+    width += pad * 2
+    height += pad * 2
+
+    # Build affine from OBB -> crop box
+    src_pts = (
+        obb_coords[:3].clone().to(dtype=torch.float32, device=device)
+    )  # using corners of OBB
+
+    # rectangular region we want to map the OBB onto
+    dst_pts = torch.tensor(
+        [[pad, pad], [width - pad, pad], [width - pad, height - pad]],
+        dtype=torch.float32,
+        device=device,
+    )
+
+    ones = torch.ones((3, 1), device=device)
+    src = torch.cat(
+        [src_pts, ones], dim=1
+    )  # appending 1s to the source points to compute affine transformation
+
+    # solves least squares system giving the affine that best maps src_pts -> dst_pts
+    affine_matrix = torch.linalg.lstsq(src, dst_pts).solution.T
+
+    # Warp the image with the affine transform
+    cropped_image = kornia.geometry.transform.warp_affine(
+        image.unsqueeze(0), affine_matrix.unsqueeze(0), dsize=(int(height), int(width))
+    )[0]
+
+    # Warp the keypoints with the same affine
+    kp_homo = torch.cat(
+        [instance.to(device), torch.ones((instance.shape[0], 1), device=device)], dim=1
+    )
+    adjusted_kpts = (affine_matrix @ kp_homo.T).T
+
+    # Define head/body keypoints
+    head_x = adjusted_kpts[head_idx, 0]
+    body_center_x = adjusted_kpts[:, 0][~torch.isnan(adjusted_kpts[:, 0])].mean()
+
+    # Rotate 180° if facing left (by comparing the head keypoint to the body center keypoints)
+    rotated = False
+    if head_x < body_center_x:
+        rotated = True
+        # Rotate image 180°
+        cropped_image = torch.rot90(cropped_image, k=2, dims=[1, 2])
+
+        adjusted_kpts[:, 0] = cropped_image.shape[2] - adjusted_kpts[:, 0]
+        adjusted_kpts[:, 1] = cropped_image.shape[1] - adjusted_kpts[:, 1]
+
+    return cropped_image, adjusted_kpts, src_pts, dst_pts, rotated
 
 
 def find_instance_crop_size(
 
@@ -8,6 +8,7 @@
 import psutil
 import numpy as np
 from sleap_nn.data.providers import get_max_instances
+from scipy.spatial import ConvexHull
 
 
 def ensure_list(x: Any) -> List[Any]:
@@ -147,3 +148,59 @@ def check_cache_memory(
     if total_cache_memory > available_memory:
         return False
     return True
+
+
+def rotating_calipers(points: torch.Tensor):
+    """Computes the convex hull of a set of points using the rotating calipers method.
+
+    Args:
+        points (torch.Tensor): (N, 2) tensor of 2D coordinates.
+
+    Returns:
+        torch.Tensor: (4, 2) tensor of the minimum-area bounding box corners.
+    """
+    # Remove NaN values and check if there are enough valid points
+    valid_points = points[~torch.isnan(points).any(dim=1)]
+
+    # Determine the convex hull using scipy's ConvexHull
+    hull = ConvexHull(valid_points)
+    hull_points = valid_points[hull.vertices]
+
+    min_area = float("inf")  # intialize minimum area to infinity
+    best_box = None  # to store the best bounding box found
+
+    # Iterate through each edge of the convex hull
+    for i in range(len(hull_points)):
+        p1 = hull_points[i]
+        p2 = hull_points[(i + 1) % len(hull_points)]
+
+        # Compute the angle of the edge
+        edge = p2 - p1
+        angle = -torch.atan2(edge[1], edge[0])
+
+        # Build rotation matrix
+        cos_a = torch.cos(angle)
+        sin_a = torch.sin(angle)
+        R = torch.stack(
+            [torch.stack([cos_a, -sin_a]), torch.stack([sin_a, cos_a])]
+        )  # shape: (2, 2)
+
+        # Rotate points
+        rotated = (hull_points - p1) @ R.T
+
+        # Compute the bounding box of the rotated points
+        xmin = torch.min(rotated[:, 0])
+        xmax = torch.max(rotated[:, 0])
+        ymin = torch.min(rotated[:, 1])
+        ymax = torch.max(rotated[:, 1])
+        area = (xmax - xmin) * (ymax - ymin)
+
+        # Update the best bounding box if the area is smaller
+        if area < min_area:
+            min_area = area
+            # rectangle corners in rotated coordinates
+            box = torch.tensor([[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]])
+            # rotate back to original coordinates
+            best_box = (box @ R) + p1
+
+    return best_box
@@ -1189,23 +1189,23 @@ def _make_labeled_frames_from_generator(
             for (
                 video_idx,
                 frame_idx,
-                bbox,
+                # bbox,
                 pred_instances,
                 pred_values,
                 instance_score,
                 org_size,
             ) in zip(
                 ex["video_idx"],
                 ex["frame_idx"],
-                ex["instance_bbox"],
+                # ex["instance_bbox"],
                 ex["pred_instance_peaks"],
                 ex["pred_peak_values"],
                 ex["centroid_val"],
                 ex["orig_size"],
             ):
                 if np.isnan(pred_instances).all():
                     continue
-                pred_instances = pred_instances + bbox.squeeze(axis=0)[0, :]
+                # pred_instances = pred_instances + bbox.squeeze(axis=0)[0, :]
                 preds[(int(video_idx), int(frame_idx))].append(
                     sio.PredictedInstance.from_numpy(
                         points_data=pred_instances,