talmolab
diff --git a/‎.DS_Store‎
6 KB b/‎.DS_Store‎
6 KB
diff --git a/‎.dockerignore‎
Lines changed: 15 additions & 0 deletions b/‎.dockerignore‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎Dockerfile‎
Lines changed: 63 additions & 0 deletions b/‎Dockerfile‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎docs/config.md‎
Lines changed: 6 additions & 4 deletions b/‎docs/config.md‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎docs/config_bottomup.yaml‎
Lines changed: 2 additions & 0 deletions b/‎docs/config_bottomup.yaml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎environment.yml‎
Lines changed: 2 additions & 2 deletions b/‎environment.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎environment_cpu.yml‎
Lines changed: 2 additions & 2 deletions b/‎environment_cpu.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎environment_mac.yml‎
Lines changed: 2 additions & 2 deletions b/‎environment_mac.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions
@@ -0,0 +1,15 @@
+README.md
+docs/
+*.egg-info/
+
+
+# Test artifacts
+tests/
+*.pytest_cache/
+
+*.ruff_cache
+codecov.yml
+
+# Git files
+.github/
+.gitignore
@@ -37,7 +37,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v4
       with:
-        python-version: 3.9
+        python-version: 3.11
 
     - name: Install dependencies
       run: |
@@ -57,7 +57,7 @@ jobs:
       fail-fast: false
       matrix:
         os: ["ubuntu-latest", "windows-latest", "macos-14"]
-        python: [3.9]
+        python: [3.11]
         include:
           # Default values
           - env_file: environment_cpu.yml
 
@@ -0,0 +1,63 @@
+## Docker image for remote development
+
+# Directly from a cuda built image.
+FROM nvidia/cuda:12.6.1-base-ubuntu24.04  
+
+LABEL maintainer="Divya Seshadri Murali <[email protected]>"
+
+USER root
+
+RUN apt-get update && apt-get install -y --no-install-recommends build-essential openssh-server
+
+
+# use tini instead of init: useful esp. when using multi-processing, ssh, zombie processes
+ENV TINI_VERSION v0.19.0
+ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini
+RUN chmod +x /tini
+# /tini -- python app.py
+ENTRYPOINT ["/tini", "--"]
+
+RUN mkdir /var/run/sshd
+RUN echo 'root:root' | chpasswd
+RUN sed -i 's/#*PermitRootLogin prohibit-password/PermitRootLogin yes/g' /etc/ssh/sshd_config
+
+# SSH login fix. Otherwise user is kicked off after login
+RUN sed -i 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' /etc/pam.d/sshd
+
+# ENV NOTVISIBLE="in users profile"
+# RUN echo "export VISIBLE=now" >> /etc/profile
+
+EXPOSE 22
+CMD ["/usr/sbin/sshd", "-D"]
+
+
+# Install all necessary packages and remove apt cache.
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        build-essential \
+        openssh-server \
+        wget \
+        curl \
+        git \
+        screen \
+        ffmpeg && \
+    rm -rf /var/lib/apt/lists/*
+
+
+# Install Miniforge
+RUN curl -fsSL --compressed https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh -o "Miniforge3-Linux-x86_64.sh" && \
+    chmod +x "Miniforge3-Linux-x86_64.sh" && \
+    bash "Miniforge3-Linux-x86_64.sh" -b -p "/root/miniforge3" && \
+    rm "Miniforge3-Linux-x86_64.sh" && \
+    /root/miniforge3/bin/conda init bash && \
+    /root/miniforge3/bin/conda clean --all -y
+
+# Add conda to path to create new env
+ENV PATH "/root/miniforge3/bin:$PATH"
+
+
+# install conda env
+RUN mkdir sleap-nn/
+WORKDIR sleap-nn
+COPY . ./sleap-nn
+RUN mamba env create -f ./sleap-nn/environment.yml
@@ -17,13 +17,13 @@ The config file has three main sections:
     - `val_labels_path`: (str) Path to validation data (`.slp` file)
     - `test_file_path`: (str) Path to test dataset (`.slp` file or `.mp4` file). *Note*: This is used only with CLI to get evaluation on test set after training is completed. 
     - `user_instances_only`: (bool) `True` if only user labeled instances should be used for training. If `False`, both user labeled and predicted instances would be used. *Default*: `True`.
-    - `data_pipeline_fw`: (str) Framework to create the data loaders. One of [`litdata`, `torch_dataset`, `torch_dataset_np_chunks`].
+    - `data_pipeline_fw`: (str) Framework to create the data loaders. One of [`litdata`, `torch_dataset`, `torch_dataset_cache_img_memory`, `torch_dataset_cache_img_disk`].
     *Default*: `"torch_dataset"`.
-    - `np_chunks_path`: (str) Path to save `.npz` chunks created with `torch_dataset_np_chunks` data pipeline framework. If `None`, the path provided in `trainer_config.save_ckpt` is used (else working dir is used). The `train_chunks` and `val_chunks` dirs are created inside this path. *Default*: `None`.
+    - `cache_img_path`: (str) Path to save `.jpg` images created with `torch_dataset_cache_img_disk` data pipeline framework. If `None`, the path provided in `trainer_config.save_ckpt` is used (else working dir is used). The `train_imgs` and `val_imgs` dirs are created inside this path. *Default*: `None`.
     - `litdata_chunks_path`: (str) Path to save `.bin` files created with `litdata` data pipeline framework. If `None`, the path provided in `trainer_config.save_ckpt` is used (else working dir is used). The `train_chunks` and `val_chunks` dirs are created inside this path. *Default*: `None`.
-    - `use_existing_chunks`: (bool) Use existing train and val chunks in the `np_chunks_path` or `chunks_path` for `torch_dataset_np_chunks` or `litdata` frameworks. If `True`, the `np_chunks_path` (or `chunks_path`) should have `train_chunks` and `val_chunks` dirs. *Default*: `False`.
+    - `use_existing_imgs`: (bool) Use existing train and val images/ chunks in the `cache_img_path` or `litdata_chunks_path` for `torch_dataset_cache_img_disk` or `litdata` frameworks. If `True`, the `cache_img_path` (or `litdata_chunks_path`) should have `train_imgs` and `val_imgs` dirs. *Default*: `False`.
     - `chunk_size`: (int) Size of each chunk (in MB). *Default*: `100`.
-    - `delete_chunks_after_training`: (bool) If `False`, the chunks (numpy or litdata chunks) are retained after training. Else, the chunks are deleted. *Default*: `True`.
+    - `delete_cache_imgs_after_training`: (bool) If `False`, the images (torch_dataset_cache_img_disk or litdata chunks) are retained after training. Else, the files are deleted. *Default*: `True`.
     #TODO: change in inference ckpts
     - `preprocessing`:
         - `is_rgb`: (bool) True if the image has 3 channels (RGB image). If input has only one
@@ -164,6 +164,8 @@ The config file has three main sections:
         - `save_last`: (bool) When True, saves a last.ckpt whenever a checkpoint file gets saved. On a local filesystem, this will be a symbolic link, and otherwise a copy of the checkpoint file. This allows accessing the latest checkpoint in a deterministic manner. *Default*: `False`.
     - `trainer_devices`: (int) Number of devices to train on (int), which devices to train on (list or str), or "auto" to select automatically. *Default*: `"auto"`.
     - `trainer_accelerator`: (str) One of the ("cpu", "gpu", "tpu", "ipu", "auto"). "auto" recognises the machine the model is running on and chooses the appropriate accelerator for the `Trainer` to be connected to. *Default*: `"auto"`.
+    - `profiler`: (str) Profiler for pytorch Trainer. One of ["advanced", "passthrough", "pytorch", "simple"]. *Default*: `None`.
+    - `trainer_strategy`: (str) Training strategy, one of ["auto", "ddp", "fsdp", "ddp_find_unused_parameters_false", "ddp_find_unused_parameters_true", ...]. This supports any training strategy that is supported by `lightning.Trainer`. *Default*: `"auto"`.
     - `enable_progress_bar`: (bool) When True, enables printing the logs during training.
     *Default*: `False`.
     - `steps_per_epoch`: (int) Minimum number of iterations in a single epoch. (Useful if model is trained with very few data points). Refer `limit_train_batches` parameter of Torch `Trainer`. If `None`, the number of iterations depends on the number of samples in the train dataset.
 
@@ -93,6 +93,8 @@ trainer_config:
     save_last: true
   trainer_devices: 1
   trainer_accelerator: cpu
+  profiler: "simple"
+  trainer_strategy: auto
   enable_progress_bar: false
   steps_per_epoch: null
   max_epochs: 50
 
@@ -6,10 +6,10 @@ channels:
   - conda-forge
 
 dependencies:
-  - python=3.9
+  - python=3.11
   - pytorch-cuda=11.8
   - numpy
-  - sleap-io
+  - sleap-io>=0.2.0
   - pydantic
   - lightning
   - cudnn
 
@@ -5,9 +5,9 @@ channels:
   - conda-forge
 
 dependencies:
-  - python=3.9
+  - python=3.11
   - numpy
-  - sleap-io
+  - sleap-io>=0.2.0
   - pytorch
   - pydantic
   - lightning
 
@@ -5,9 +5,9 @@ channels:
   - conda-forge
 
 dependencies:
-  - python=3.9
+  - python=3.11
   - numpy
-  - sleap-io
+  - sleap-io>=0.2.0
   - pydantic
   - lightning
   - pytorch
 
@@ -12,11 +12,11 @@ authors = [
     {name = "Talmo Pereira", email = "[email protected]"}
 ]
 description = "Neural network backend for training and inference for animal pose estimation."
-requires-python = ">=3.9"
+requires-python = ">=3.11"
 keywords = ["sleap", "pose estimation", "deep learning", "neural networks", "computer vision", "animal behavior"]
 license = {text = "BSD-3-Clause"}
 classifiers = [
-    "Programming Language :: Python :: 3.9"
+    "Programming Language :: Python :: 3.11"
 ]
 dependencies = [
     "torch",
Original file line number	Diff line number	Diff line change
`@@ -12,11 +12,11 @@ authors = [`
`12`	`12`	`{name = "Talmo Pereira", email = "[email protected]"}`
`13`	`13`	`]`
`14`	`14`	`description = "Neural network backend for training and inference for animal pose estimation."`
`15`		`-requires-python = ">=3.9"`
	`15`	`+requires-python = ">=3.11"`
`16`	`16`	`keywords = ["sleap", "pose estimation", "deep learning", "neural networks", "computer vision", "animal behavior"]`
`17`	`17`	`license = {text = "BSD-3-Clause"}`
`18`	`18`	`classifiers = [`
`19`		`- "Programming Language :: Python :: 3.9"`
	`19`	`+ "Programming Language :: Python :: 3.11"`
`20`	`20`	`]`
`21`	`21`	`dependencies = [`
`22`	`22`	`"torch",`