Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
d1df4a3
Merge branch 'main' into dev
kozlov721 Jul 5, 2024
06345f1
Merge branch 'dev' of github.com:luxonis/luxonis-ml into dev
kozlov721 Jul 11, 2024
dda0426
Merge branch 'dev' of github.com:luxonis/luxonis-ml into dev
kozlov721 Jul 11, 2024
563a828
Merge branch 'main' of https://github.com/luxonis/luxonis-ml into main
sokovninn Oct 22, 2024
ce1edf2
Merge branch 'main' of https://github.com/luxonis/luxonis-ml into main
sokovninn Nov 4, 2024
68aa1e9
Merge branch 'main' of https://github.com/luxonis/luxonis-ml into main
sokovninn Nov 5, 2024
f92fb1f
Merge branch 'main' of https://github.com/luxonis/luxonis-ml into main
sokovninn Nov 7, 2024
f4853cb
Merge branch 'main' of https://github.com/luxonis/luxonis-ml into main
sokovninn Nov 25, 2024
772f16b
Merge branch 'main' of https://github.com/luxonis/luxonis-ml into main
sokovninn Nov 27, 2024
779650e
Merge branch 'main' of https://github.com/luxonis/luxonis-ml into main
sokovninn Dec 4, 2024
f1608a9
Merge branch 'main' of https://github.com/luxonis/luxonis-ml into main
sokovninn Dec 4, 2024
0f4156f
Merge branch 'main' of https://github.com/luxonis/luxonis-ml into main
sokovninn Jan 22, 2025
de69f48
Merge branch 'main' of https://github.com/luxonis/luxonis-ml into main
sokovninn Jan 22, 2025
47dcfd5
Merge branch 'main' of https://github.com/luxonis/luxonis-ml into main
sokovninn Jan 29, 2025
03a1dae
Merge branch 'main' of https://github.com/luxonis/luxonis-ml into main
sokovninn Feb 25, 2025
4db02d5
Merge branch 'main' of https://github.com/luxonis/luxonis-ml into main
sokovninn Jun 23, 2025
ca8c065
Merge branch 'main' of https://github.com/luxonis/luxonis-ml into main
sokovninn Jul 1, 2025
2be3c5d
Merge branch 'main' of https://github.com/luxonis/luxonis-ml into main
sokovninn Jul 11, 2025
f7a8168
Merge branch 'main' of https://github.com/luxonis/luxonis-ml into main
sokovninn Jul 18, 2025
3b48efe
feat: add class reordering to the loader
sokovninn Jul 20, 2025
c5b73ee
fix: no overwriting of metadata
sokovninn Jul 21, 2025
9559de2
Merge branch 'main' of https://github.com/luxonis/luxonis-ml into main
sokovninn Jul 21, 2025
cc16c05
Merge branch 'main' into feat/class-reordering-in-loader
sokovninn Jul 21, 2025
989d19f
feat: add class reordering to LuxonisDataset
sokovninn Jul 25, 2025
5b52a33
Merge branch 'main' into feat/class-reordering-in-loader
sokovninn Jul 25, 2025
ac53cc2
docs: add set_class_order_per_task() to the dataset docs
sokovninn Jul 29, 2025
70d777d
style: md formatting
sokovninn Jul 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions luxonis_ml/data/datasets/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,22 @@ The `push_to_cloud()` method is used to upload a local dataset to the specified
| `update_mode` | `UpdateMode` | `UpdateMode.MISSING` | Whether to always push (overwrite) the dataset’s media folder to the cloud or only upload missing files. |
| `bucket_storage` | `BucketStorage` | Required | The cloud storage destination to which local media files should be uploaded (e.g., GCS, S3, Azure). |

### Setting Class Order per Task

The `set_class_order_per_task()` method allows you to define a specific ordering of classes for one or more tasks, without rewriting the dataset’s metadata.

#### Parameters

| Parameter | Type | Default | Description |
| ---------------------- | ---------------------- | -------- | ------------------------------------------------------------------------------------------------------------ |
| `class_order_per_task` | `dict[str, list[str]]` | Required | Mapping of task names to ordered lists of class names. Class names must exactly match the dataset’s classes. |

#### Persistence & Usage Notes

- **View-only ordering**: This method does *not* rewrite the dataset’s stored metadata (since `rewrite_metadata=False`). Instead, it applies the new class order as a view on the dataset object.
- **New classes**: If new classes are added to the dataset, you must call `set_class_order_per_task()` again to include and order them.
- **Loader initialization**: For `LuxonisLoader`, apply class ordering *before* passing the dataset into the loader to avoid unintended reordering during loader setup.

## In-Depth Explanation of luxonis-ml Dataset Storage

### File Structure
Expand Down
50 changes: 49 additions & 1 deletion luxonis_ml/data/datasets/luxonis_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,7 @@ def set_classes(
self,
classes: list[str] | dict[str, int],
task: str | None = None,
rewrite_metadata: bool = True,
) -> None:
if task is None:
tasks = self.get_task_names()
Expand All @@ -753,7 +754,8 @@ def set_classes(
for t in tasks:
self._metadata.set_classes(classes, t)

self._write_metadata()
if rewrite_metadata:
self._write_metadata()

@override
def get_classes(self) -> dict[str, dict[str, int]]:
Expand Down Expand Up @@ -1837,3 +1839,49 @@ def remove_duplicates(self) -> None:
logger.info(
"Successfully removed duplicate files and annotations from the dataset."
)

def set_class_order_per_task(
self, class_order_per_task: dict[str, list[str]]
) -> None:
"""Sets the class order for provided tasks. This method checks
if the provided class order matches the dataset's classes and
updates the dataset accordingly.

@type class_order_per_task: dict[str, list[str]]
@param class_order_per_task: A dictionary mapping task names to
a list of class names. The class names must match the
dataset's classes for the respective tasks.
@raises ValueError: If the task name is not found in the dataset
tasks or if the provided class names do not match the
dataset's classes.
"""
for task_name, task_classes in class_order_per_task.items():
if task_name not in self.get_tasks():
raise ValueError(
f"Task {task_name} not found in dataset tasks. "
f"Available tasks: {list(self.get_tasks().keys())}"
)
if set(task_classes) != set(self.get_classes()[task_name].keys()):
raise ValueError(
f"Classes for task {task_name} do not match "
f"the classes in the dataset. "
f"Expected: {set(self.get_classes()[task_name].keys())}, "
f"Got: {set(task_classes)}."
)

current_classes = list(self.get_classes()[task_name].keys())
if task_classes != current_classes:
logger.warning(
f"Reordering classes for task {task_name}. "
f"Original order: {current_classes}, "
f"New order: {task_classes}."
)

self.set_classes(
classes={
class_name: i
for i, class_name in enumerate(task_classes)
},
task=task_name,
rewrite_metadata=False,
)
1 change: 1 addition & 0 deletions luxonis_ml/data/loaders/luxonis_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def __init__(
self.width = width

self.dataset = dataset

self.sync_mode = self.dataset.is_remote
self.keep_categorical_as_strings = keep_categorical_as_strings
self.filter_task_names = filter_task_names
Expand Down
154 changes: 154 additions & 0 deletions tests/test_data/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1118,3 +1118,157 @@ def generator(start: int, end: int) -> DatasetIterator:
)
== 6
)


def create_test_dataset_with_classes(
tempdir: Path, task_classes: dict[str, dict[str, int]]
) -> LuxonisDataset:
"""Helper function to create a test dataset with specific class
mappings."""

def generator() -> DatasetIterator:
for i in range(5):
img = create_image(i, tempdir)
yield {
"file": img,
"annotation": {
"class": list(task_classes["classification"].keys())[
i % len(task_classes["classification"])
],
"boundingbox": {
"x": 0.1 + i * 0.1,
"y": 0.1 + i * 0.1,
"w": 0.2,
"h": 0.2,
},
},
"task_name": "classification",
}

dataset = LuxonisDataset(
"test_class_order",
delete_local=True,
delete_remote=True,
bucket_storage=BucketStorage.LOCAL,
).add(generator())

# Set the classes for the dataset
for task_name, classes in task_classes.items():
dataset.set_classes(classes, task=task_name)

dataset.make_splits(ratios=(1, 0, 0))
return dataset


def test_class_order_per_task_valid_reordering(tempdir: Path):
"""Test valid class reordering for a task."""
original_classes = {"classification": {"cat": 0, "dog": 1, "bird": 2}}

dataset = create_test_dataset_with_classes(tempdir, original_classes)

# Define new class order
class_order_per_task = {"classification": ["dog", "bird", "cat"]}
dataset.set_class_order_per_task(class_order_per_task)

# Verify that classes were reordered
expected_classes = {"dog": 0, "bird": 1, "cat": 2}
assert dataset.get_classes()["classification"] == expected_classes


def test_class_order_per_task_multiple_tasks(tempdir: Path):
"""Test class reordering for multiple tasks."""
original_classes = {
"classification": {"cat": 0, "dog": 1, "bird": 2},
"detection": {"person": 0, "car": 1, "bike": 2},
}

# Create a more complex dataset with multiple tasks
def generator() -> DatasetIterator:
for i in range(5):
img = create_image(i, tempdir)
yield {
"file": img,
"annotation": {
"class": list(original_classes["classification"].keys())[
i % 3
],
},
"task_name": "classification",
}

for i in range(5, 10):
img = create_image(i, tempdir)
yield {
"file": img,
"annotation": {
"class": list(original_classes["detection"].keys())[
(i - 5) % 3
],
},
"task_name": "classification_1",
}

dataset = LuxonisDataset(
"test_multi_task_class_order",
delete_local=True,
delete_remote=True,
bucket_storage=BucketStorage.LOCAL,
).add(generator())

for task_name, classes in original_classes.items():
dataset.set_classes(classes, task=task_name)

dataset.make_splits(ratios=(1, 0, 0))

# Define new class orders for both tasks
class_order_per_task = {
"classification": ["bird", "cat", "dog"],
"classification_1": ["bike", "person", "car"],
}

dataset.set_class_order_per_task(class_order_per_task)

# Verify both tasks were reordered correctly
expected_classification = {"bird": 0, "cat": 1, "dog": 2}
expected_detection = {"bike": 0, "person": 1, "car": 2}

assert dataset.get_classes()["classification"] == expected_classification
assert dataset.get_classes()["classification_1"] == expected_detection


def test_class_order_per_task_invalid_task_name(tempdir: Path):
"""Test error when providing an invalid task name."""
original_classes = {"classification": {"cat": 0, "dog": 1, "bird": 2}}

dataset = create_test_dataset_with_classes(tempdir, original_classes)

# Define class order for non-existent task
class_order_per_task = {"invalid_task": ["cat", "dog", "bird"]}

with pytest.raises(
ValueError,
match=r"Task invalid_task not found in dataset tasks\. Available tasks: \['classification'\]",
):
dataset.set_class_order_per_task(class_order_per_task)


def test_class_order_per_task_mismatched_classes(tempdir: Path):
"""Test error when provided classes don't match dataset classes."""
original_classes = {"classification": {"cat": 0, "dog": 1, "bird": 2}}

dataset = create_test_dataset_with_classes(tempdir, original_classes)

# Define class order with wrong class names
class_order_per_task = {
"classification": [
"cat",
"dog",
"fish",
] # "fish" is not in original classes
}

with pytest.raises(
ValueError,
match=r"Classes for task classification do not match the classes in the dataset.",
):
dataset.set_class_order_per_task(class_order_per_task)
Loading