Update default configuration values for improved training (#375)

gitttt-1234 · claude · web-flow · commit c793ccd05c6f · 2025-11-20T18:47:05.000-08:00
## Summary This PR updates several default configuration values across the codebase to provide better out-of-box training behavior and align with best practices for pose estimation training. ## Configuration Changes ### DataLoaderConfig - **batch_size**: `1` → `4` - More efficient training with larger batch sizes - Better gradient estimates and faster convergence - Applies to both train and validation data loaders ### TrainerConfig - **max_epochs**: `10` → `100` - Allows sufficient training time for proper convergence - More appropriate default for pose estimation models - **seed**: `0` → `None` - No default seeding, allowing natural randomization - Users can explicitly set seed when reproducibility is needed ### DataConfig - **use_augmentations_train**: `False` → `True` - Enables data augmentation by default - Improves model generalization and robustness - **Removed conditional logic** in `data_mapper` - Previously auto-set `use_augmentations_train` based on augmentation args - Now consistently defaults to `True` for cleaner behavior ### ModelConfig - **ClassMapConfig sigma**: `15.0` → `5.0` - More precise class map generation for multi-class models - Consistent with confmaps sigma defaults - Better localization accuracy ## Files Changed - ✅ `sleap_nn/config/data_config.py` - Updated defaults and removed conditional logic - ✅ `sleap_nn/config/model_config.py` - Updated ClassMapConfig sigma - ✅ `sleap_nn/config/trainer_config.py` - Updated batch_size, max_epochs, seed, and all docstrings ## Benefits - 🎯 Better default training configurations out-of-box - 📈 Improved training efficiency with larger batch sizes - 🔄 Data augmentation enabled by default for better generalization - ⏱️ Sufficient epochs for proper model convergence - 📝 Accurate documentation across all config classes ## Backwards Compatibility These changes only affect default values. All existing configurations with explicit values will continue to work as before. Users can override any of these defaults through their config files. ## Testing - ✅ All configuration classes properly instantiate with new defaults - ✅ Docstrings accurately reflect current values - ✅ Linter passes 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude <noreply@anthropic.com>
diff --git a/sleap_nn/config/data_config.py b/sleap_nn/config/data_config.py
@@ -165,7 +165,7 @@ class DataConfig:
         use_existing_imgs: (bool) Use existing train and val images/ chunks in the `cache_img_path` for `torch_dataset_cache_img_disk` frameworks. If `True`, the `cache_img_path` should have `train_imgs` and `val_imgs` dirs. *Default*: `False`.
         delete_cache_imgs_after_training: (bool) If `False`, the images (torch_dataset_cache_img_disk) are retained after training. Else, the files are deleted. *Default*: `True`.
         preprocessing: Configuration options related to data preprocessing.
-        use_augmentations_train: (bool) True if the data augmentation should be applied to the training data, else False. *Default*: `False`.
+        use_augmentations_train: (bool) True if the data augmentation should be applied to the training data, else False. *Default*: `True`.
         augmentation_config: Configurations related to augmentation. (only if `use_augmentations_train` is `True`)
         skeletons: skeleton configuration for the `.slp` file. This will be pulled from the train dataset and saved to the `training_config.yaml`
     """
@@ -181,7 +181,7 @@ class DataConfig:
     use_existing_imgs: bool = False
     delete_cache_imgs_after_training: bool = True
     preprocessing: PreprocessingConfig = field(factory=PreprocessingConfig)
-    use_augmentations_train: bool = False
+    use_augmentations_train: bool = True
     augmentation_config: Optional[AugmentationConfig] = None
     skeletons: Optional[list] = None
 
@@ -463,9 +463,6 @@ def data_mapper(legacy_config: dict) -> DataConfig:
         geometric=GeometricConfig(**geometric_args),
     )
 
-    data_cfg_args["use_augmentations_train"] = (
-        True if any(intensity_args.values()) or any(geometric_args.values()) else False
-    )
     data_cfg_args["skeletons"] = (
         skeletons_list
         if skeletons_list is not None and len(skeletons_list) > 0
diff --git a/sleap_nn/config/model_config.py b/sleap_nn/config/model_config.py
@@ -837,7 +837,7 @@ class ClassMapConfig:
     """
 
     classes: Optional[List[str]] = None
-    sigma: float = 15.0
+    sigma: float = 5.0
     output_stride: int = 1
     loss_weight: Optional[float] = None
 
diff --git a/sleap_nn/config/trainer_config.py b/sleap_nn/config/trainer_config.py
@@ -17,12 +17,12 @@ class DataLoaderConfig:
     """Train DataLoaderConfig.
 
     Attributes:
-        batch_size: (int) Number of samples per batch or batch size for training/validation data. *Default*: `1`.
+        batch_size: (int) Number of samples per batch or batch size for training/validation data. *Default*: `4`.
         shuffle: (bool) True to have the data reshuffled at every epoch. *Default*: `False`.
         num_workers: (int) Number of subprocesses to use for data loading. 0 means that the data will be loaded in the main process. *Default*: `0`.
     """
 
-    batch_size: int = 1
+    batch_size: int = 4
     shuffle: bool = False
     num_workers: int = 0
 
@@ -32,7 +32,7 @@ class TrainDataLoaderConfig(DataLoaderConfig):
     """Train DataLoaderConfig.
 
     Attributes:
-        batch_size: (int) Number of samples per batch or batch size for training/validation data. *Default*: `1`.
+        batch_size: (int) Number of samples per batch or batch size for training/validation data. *Default*: `4`.
         shuffle: (bool) True to have the data reshuffled at every epoch. *Default*: `True`.
         num_workers: (int) Number of subprocesses to use for data loading. 0 means that the data will be loaded in the main process. *Default*: `0`.
     """
@@ -45,7 +45,7 @@ class ValDataLoaderConfig(DataLoaderConfig):
     """Validation DataLoaderConfig.
 
     Attributes:
-        batch_size: (int) Number of samples per batch or batch size for training/validation data. *Default*: `1`.
+        batch_size: (int) Number of samples per batch or batch size for training/validation data. *Default*: `4`.
         shuffle: (bool) True to have the data reshuffled at every epoch. *Default*: `False`.
         num_workers: (int) Number of subprocesses to use for data loading. 0 means that the data will be loaded in the main process. *Default*: `0`.
     """
@@ -243,8 +243,8 @@ class TrainerConfig:
         train_steps_per_epoch: (int) Number of minibatches (steps) to train for in an epoch. If set to `None`, this is set to the number of batches in the training data or `min_train_steps_per_epoch`, whichever is largest. *Default*: `None`. **Note**: In a multi-gpu training setup, the effective steps during training would be the `trainer_steps_per_epoch` / `trainer_devices`.
         visualize_preds_during_training: (bool) If set to `True`, sample predictions (keypoints + confidence maps) are saved to `viz` folder in the ckpt dir and in wandb table. *Default*: `False`.
         keep_viz: (bool) If set to `True`, the `viz` folder will be kept after training. If `False`, the `viz` folder will be deleted after training. Only applies when `visualize_preds_during_training` is `True`. *Default*: `False`.
-        max_epochs: (int) Maximum number of epochs to run. *Default*: `10`.
-        seed: (int) Seed value for the current experiment. If None, no seeding is applied. *Default*: `0`.
+        max_epochs: (int) Maximum number of epochs to run. *Default*: `100`.
+        seed: (int) Seed value for the current experiment. If None, no seeding is applied. *Default*: `None`.
         use_wandb: (bool) True to enable wandb logging. *Default*: `False`.
         save_ckpt: (bool) True to enable checkpointing. *Default*: `False`.
         ckpt_dir: (str) Directory path where the `<run_name>` folder is created. If `None`, a new folder for the current run is created in the working dir. **Default**: `None`
@@ -274,7 +274,7 @@ class TrainerConfig:
     train_steps_per_epoch: Optional[int] = None
     visualize_preds_during_training: bool = False
     keep_viz: bool = False
-    max_epochs: int = 10
+    max_epochs: int = 100
     seed: Optional[int] = None
     use_wandb: bool = False
     save_ckpt: bool = False
diff --git a/tests/config/test_trainer_config.py b/tests/config/test_trainer_config.py
@@ -67,15 +67,15 @@ def test_dataloader_config():
     conf = OmegaConf.structured(TrainDataLoaderConfig)
     conf_instance = OmegaConf.structured(TrainDataLoaderConfig())
     assert conf == conf_instance
-    assert conf.batch_size == 1
+    assert conf.batch_size == 4
     assert conf.shuffle is True
     assert conf.num_workers == 0
 
     # Check default values
     conf = OmegaConf.structured(ValDataLoaderConfig)
     conf_instance = OmegaConf.structured(ValDataLoaderConfig())
     assert conf == conf_instance
-    assert conf.batch_size == 1
+    assert conf.batch_size == 4
     assert conf.shuffle is False
     assert conf.num_workers == 0
 
@@ -211,9 +211,12 @@ def test_trainer_config(caplog):
     conf_dict = asdict(conf)  # Convert to dict for OmegaConf
     conf_structured = OmegaConf.create(conf_dict)
 
-    assert conf_structured.train_data_loader.batch_size == 1
+    assert conf_structured.train_data_loader.batch_size == 4
+    assert conf_structured.val_data_loader.batch_size == 4
     assert conf_structured.val_data_loader.shuffle is False
     assert conf_structured.model_ckpt.save_top_k == 1
+    assert conf_structured.max_epochs == 100
+    assert conf_structured.seed is None
     assert conf_structured.optimizer.lr == 1e-4
     assert conf_structured.lr_scheduler is not None
     assert conf_structured.lr_scheduler.reduce_lr_on_plateau is not None