Add --config flag for simpler train CLI + fix crop device mismatch (#429)

talmo · claude · web-flow · commit 407861df7ee8 · 2026-01-20T07:52:09.000-08:00
## Summary - **Simpler train CLI**: Add `--config` flag and positional argument support for `sleap-nn train` - **Bug fix**: Fix device mismatch in `crop_bboxes` during top-down inference - **Bug fix**: Fix inference progress ending at 99% instead of 100% in GUI mode ## New Train CLI Usage ```bash # Positional config path (new!) sleap-nn train path/to/config.yaml # With --config flag (new!) sleap-nn train --config path/to/config.yaml # With Hydra overrides sleap-nn train config.yaml trainer_config.max_epochs=100 # Legacy still works sleap-nn train --config-dir /path/to/dir --config-name myrun ``` Also adds `rich-click` for styled CLI help output. ## Bug Fixes ### Device mismatch in crop_bboxes Fixed `RuntimeError: indices should be either on cpu or on the same device as the indexed tensor` when bboxes tensor is on GPU but images are on CPU during top-down inference. ### Progress ends at 99% Fixed inference progress bar ending at 99% instead of 100% in GUI mode. The throttled progress reporting (~4Hz) was skipping the final update when the last batch completed within 0.25s of the previous report. ## Test plan - [x] `pytest tests/inference/test_peak_finding.py` - all pass - [x] `pytest tests/inference/test_topdown.py` - all pass - [x] `pytest tests/test_cli.py` - all pass - [ ] Manual test of new CLI patterns - [ ] Verify inference progress shows 100% in SLEAP GUI 🤖 Generated with [Claude Code](https://claude.ai/code) --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
diff --git a/pyproject.toml b/pyproject.toml
@@ -48,6 +48,7 @@ dependencies = [
     "jupyter",
     "jupyterlab",
     "pyzmq",
+    "rich-click>=1.9.5",
 ]
 dynamic = ["version", "readme"]
 
diff --git a/sleap_nn/cli.py b/sleap_nn/cli.py
@@ -1,6 +1,7 @@
-"""Unified CLI for SLEAP-NN using Click."""
+"""Unified CLI for SLEAP-NN using rich-click for styled output."""
 
-import click
+import rich_click as click
+from click import Command
 from loguru import logger
 from pathlib import Path
 from omegaconf import OmegaConf, DictConfig
@@ -13,7 +14,36 @@
 from sleap_nn import __version__
 import hydra
 import sys
-from click import Command
+
+# Rich-click configuration for styled help
+click.rich_click.TEXT_MARKUP = "markdown"
+click.rich_click.SHOW_ARGUMENTS = True
+click.rich_click.GROUP_ARGUMENTS_OPTIONS = True
+click.rich_click.STYLE_ERRORS_SUGGESTION = "magenta italic"
+click.rich_click.ERRORS_EPILOGUE = (
+    "Try 'sleap-nn [COMMAND] --help' for more information."
+)
+
+
+def is_config_path(arg: str) -> bool:
+    """Check if an argument looks like a config file path.
+
+    Returns True if the arg ends with .yaml or .yml.
+    """
+    return arg.endswith(".yaml") or arg.endswith(".yml")
+
+
+def split_config_path(config_path: str) -> tuple:
+    """Split a full config path into (config_dir, config_name).
+
+    Args:
+        config_path: Full path to a config file.
+
+    Returns:
+        Tuple of (config_dir, config_name) where config_dir is an absolute path.
+    """
+    path = Path(config_path).resolve()
+    return path.parent.as_posix(), path.name
 
 
 def print_version(ctx, param, value):
@@ -66,38 +96,77 @@ def cli():
 
 
 def show_training_help():
-    """Display training help information."""
-    help_text = """
-sleap-nn train — Train SLEAP models from a config YAML file.
-
-Usage:
-  sleap-nn train --config-dir <dir> --config-name <name> [overrides]
-
-Common overrides:
-  trainer_config.max_epochs=100
-  trainer_config.batch_size=32
-
-Examples:
-  Start new run:
-    sleap-nn train --config-dir /path/to/config_dir/ --config-name myrun
-  Resume 20 more epochs:
-    sleap-nn train --config-dir /path/to/config_dir/ --config-name myrun \\
-      trainer_config.resume_ckpt_path=<path/to/ckpt> \\
-      trainer_config.max_epochs=20
-
-Tips:
-  - Use -m/--multirun for sweeps; outputs go under hydra.sweep.dir.
-  - For Hydra flags and completion, use --hydra-help.
-
-For a detailed list of all available config options, please refer to https://nn.sleap.ai/config/.
+    """Display training help information with rich formatting."""
+    from rich.console import Console
+    from rich.panel import Panel
+    from rich.markdown import Markdown
+
+    console = Console()
+
+    help_md = """
+## Usage
+
+```
+sleap-nn train <config.yaml> [overrides]
+sleap-nn train --config <path/to/config.yaml> [overrides]
+```
+
+## Common Overrides
+
+| Override | Description |
+|----------|-------------|
+| `trainer_config.max_epochs=100` | Set maximum training epochs |
+| `trainer_config.batch_size=32` | Set batch size |
+| `trainer_config.save_ckpt=true` | Enable checkpoint saving |
+
+## Examples
+
+**Start a new training run:**
+```bash
+sleap-nn train path/to/config.yaml
+sleap-nn train --config path/to/config.yaml
+```
+
+**With overrides:**
+```bash
+sleap-nn train config.yaml trainer_config.max_epochs=100
+```
+
+**Resume training:**
+```bash
+sleap-nn train config.yaml trainer_config.resume_ckpt_path=/path/to/ckpt
+```
+
+**Legacy usage (still supported):**
+```bash
+sleap-nn train --config-dir /path/to/dir --config-name myrun
+```
+
+## Tips
+
+- Use `-m/--multirun` for sweeps; outputs go under `hydra.sweep.dir`
+- For Hydra flags and completion, use `--hydra-help`
+- Config documentation: https://nn.sleap.ai/config/
 """
-    click.echo(help_text)
+    console.print(
+        Panel(
+            Markdown(help_md),
+            title="[bold cyan]sleap-nn train[/bold cyan]",
+            subtitle="Train SLEAP models from a config YAML file",
+            border_style="cyan",
+        )
+    )
 
 
 @cli.command(cls=TrainCommand)
-@click.option("--config-name", "-c", type=str, help="Configuration file name")
 @click.option(
-    "--config-dir", "-d", type=str, default=".", help="Configuration directory path"
+    "--config",
+    type=str,
+    help="Path to configuration file (e.g., path/to/config.yaml)",
+)
+@click.option("--config-name", "-c", type=str, help="Configuration file name (legacy)")
+@click.option(
+    "--config-dir", "-d", type=str, default=".", help="Configuration directory (legacy)"
 )
 @click.option(
     "--video-paths",
@@ -130,25 +199,43 @@ def show_training_help():
     'Example: --prefix-map "/old/server/path" "/new/local/path"',
 )
 @click.argument("overrides", nargs=-1, type=click.UNPROCESSED)
-def train(config_name, config_dir, video_paths, video_path_map, prefix_map, overrides):
+def train(
+    config, config_name, config_dir, video_paths, video_path_map, prefix_map, overrides
+):
     """Run training workflow with Hydra config overrides.
 
     Examples:
-        sleap-nn train --config-name myconfig --config-dir /path/to/config_dir/
+        sleap-nn train path/to/config.yaml
+        sleap-nn train --config path/to/config.yaml trainer_config.max_epochs=100
         sleap-nn train -c myconfig -d /path/to/config_dir/ trainer_config.max_epochs=100
-        sleap-nn train -c myconfig -d /path/to/config_dir/ +experiment=new_model
     """
-    # Show help if no config name provided
-    if not config_name:
+    # Convert overrides to a mutable list
+    overrides = list(overrides)
+
+    # Check if the first positional arg is a config path (not a Hydra override)
+    config_from_positional = None
+    if overrides and is_config_path(overrides[0]):
+        config_from_positional = overrides.pop(0)
+
+    # Resolve config path with priority:
+    # 1. Positional config path (e.g., sleap-nn train config.yaml)
+    # 2. --config flag (e.g., sleap-nn train --config config.yaml)
+    # 3. Legacy --config-dir/--config-name flags
+    if config_from_positional:
+        config_dir, config_name = split_config_path(config_from_positional)
+    elif config:
+        config_dir, config_name = split_config_path(config)
+    elif config_name:
+        config_dir = Path(config_dir).resolve().as_posix()
+    else:
+        # No config provided - show help
         show_training_help()
         return
 
-    # Initialize Hydra manually
-    # resolve the path to the config directory (hydra expects absolute path)
-    config_dir = Path(config_dir).resolve().as_posix()
+    # Initialize Hydra manually (config_dir is already an absolute path)
     with hydra.initialize_config_dir(config_dir=config_dir, version_base=None):
         # Compose config with overrides
-        cfg = hydra.compose(config_name=config_name, overrides=list(overrides))
+        cfg = hydra.compose(config_name=config_name, overrides=overrides)
 
         # Validate config
         if not hasattr(cfg, "model_config") or not cfg.model_config:
diff --git a/sleap_nn/inference/peak_finding.py b/sleap_nn/inference/peak_finding.py
@@ -74,8 +74,10 @@ def crop_bboxes(
     # Get crop centers from bboxes.
     # The bbox top-left is at index 0, with (x, y) coordinates.
     # We need the center of the crop (peak location), which is top-left + half_size.
-    crop_x = (bboxes[:, 0, 0] + half_w).to(torch.long)
-    crop_y = (bboxes[:, 0, 1] + half_h).to(torch.long)
+    # Ensure bboxes are on the same device as images for index computation.
+    bboxes_on_device = bboxes.to(device)
+    crop_x = (bboxes_on_device[:, 0, 0] + half_w).to(torch.long)
+    crop_y = (bboxes_on_device[:, 0, 1] + half_h).to(torch.long)
 
     # Clamp indices to valid bounds to handle edge cases where centroids
     # might be at or beyond image boundaries.
@@ -86,7 +88,7 @@ def crop_bboxes(
     # Convert sample_inds to tensor if it's a list.
     if not isinstance(sample_inds, torch.Tensor):
         sample_inds = torch.tensor(sample_inds, device=device)
-    sample_inds_long = sample_inds.to(torch.long)
+    sample_inds_long = sample_inds.to(device=device, dtype=torch.long)
     crops = patches[sample_inds_long, :, crop_y, crop_x]
     # Shape: (n_crops, channels, height, width)
 
diff --git a/sleap_nn/inference/predictors.py b/sleap_nn/inference/predictors.py
@@ -567,6 +567,16 @@ def _predict_generator_gui(
                     print(json.dumps(progress_data), flush=True)
                     last_report = time()
 
+        # Final progress emit to ensure 100% is shown
+        elapsed = time() - start_time
+        progress_data = {
+            "n_processed": total_frames,
+            "n_total": total_frames,
+            "rate": round(frames_processed / elapsed, 1) if elapsed > 0 else 0,
+            "eta": 0,
+        }
+        print(json.dumps(progress_data), flush=True)
+
     def _predict_generator_rich(
         self, total_frames: int
     ) -> Iterator[Dict[str, np.ndarray]]:
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -56,7 +56,7 @@ def test_train_help(self):
         result = runner.invoke(cli, ["train", "--help"])
         assert result.exit_code == 0
         assert "sleap-nn train" in result.output
-        assert "Usage:" in result.output
+        assert "Usage" in result.output  # Rich-click renders ## Usage as header
         assert "sleap.ai" in result.output
 
     def test_train_no_config_shows_help(self):
@@ -65,7 +65,7 @@ def test_train_no_config_shows_help(self):
         result = runner.invoke(cli, ["train", "--config-dir", "."])
         assert result.exit_code == 0
         assert "sleap-nn train" in result.output
-        assert "Usage:" in result.output
+        assert "Usage" in result.output  # Rich-click renders ## Usage as header
 
 
 class TestSystemCommand:
@@ -138,9 +138,8 @@ def test_show_training_help_output(self, capsys):
         show_training_help()
         captured = capsys.readouterr()
         assert "sleap-nn train" in captured.out
-        assert "Usage:" in captured.out
-        assert "--config-dir" in captured.out
-        assert "--config-name" in captured.out
+        assert "Usage" in captured.out  # Rich-click renders ## Usage as header
+        assert "config.yaml" in captured.out  # New positional arg usage
         assert "sleap.ai" in captured.out
 
 
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -48,6 +48,7 @@ dependencies = [`
`48`	`48`	`"jupyter",`
`49`	`49`	`"jupyterlab",`
`50`	`50`	`"pyzmq",`
	`51`	`+ "rich-click>=1.9.5",`
`51`	`52`	`]`
`52`	`53`	`dynamic = ["version", "readme"]`
`53`	`54`