Skip to content

Commit 6bcbb11

Browse files
authored
LuxonisParser - RoboFlow URL Support (#189)
1 parent 4fa0a02 commit 6bcbb11

File tree

6 files changed

+92
-17
lines changed

6 files changed

+92
-17
lines changed

.github/workflows/ci.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ jobs:
4141
with:
4242
ref: ${{ github.head_ref }}
4343

44+
- name: Install pre-commit
45+
run: python3 -m pip install 'pre-commit<4.0.0'
46+
4447
- name: Run pre-commit
4548
uses: pre-commit/[email protected]
4649

luxonis_ml/data/parsers/luxonis_parser.py

Lines changed: 62 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22
import zipfile
33
from enum import Enum
4+
from importlib.util import find_spec
45
from pathlib import Path
56
from typing import (
67
Dict,
@@ -16,7 +17,8 @@
1617
from luxonis_ml.data import DATASETS_REGISTRY, BaseDataset, LuxonisDataset
1718
from luxonis_ml.data.utils.enums import LabelType
1819
from luxonis_ml.enums import DatasetType
19-
from luxonis_ml.utils import LuxonisFileSystem
20+
from luxonis_ml.utils import LuxonisFileSystem, environ
21+
from luxonis_ml.utils.filesystem import _pip_install
2022

2123
from .base_parser import BaseParser
2224
from .classification_directory_parser import ClassificationDirectoryParser
@@ -72,8 +74,15 @@ def __init__(
7274
appropriate parser.
7375
7476
@type dataset_dir: str
75-
@param dataset_dir: Path to the dataset directory or zip file.
76-
Can also be a remote URL supported by L{LuxonisFileSystem}.
77+
@param dataset_dir: Identifier of the dataset directory.
78+
Can be one of:
79+
- Local path to the dataset directory.
80+
- Remote URL supported by L{LuxonisFileSystem}.
81+
- C{gcs://} for Google Cloud Storage
82+
- C{s3://} for Amazon S3
83+
- C{roboflow://} for Roboflow datasets.
84+
- Expected format: C{roboflow://workspace/project/version/format}.
85+
Can be a remote URL supported by L{LuxonisFileSystem}.
7786
@type dataset_name: Optional[str]
7887
@param dataset_name: Name of the dataset. If C{None}, the name
7988
is derived from the name of the dataset directory.
@@ -97,9 +106,16 @@ def __init__(
97106
names.
98107
"""
99108
save_dir = Path(save_dir) if save_dir else None
100-
name = Path(dataset_dir).name
101-
local_path = (save_dir or Path.cwd()) / name
102-
self.dataset_dir = LuxonisFileSystem.download(dataset_dir, local_path)
109+
if dataset_dir.startswith("roboflow://"):
110+
self.dataset_dir, name = self._download_roboflow_dataset(
111+
dataset_dir, save_dir
112+
)
113+
else:
114+
name = dataset_dir.split("/")[-1]
115+
local_path = (save_dir or Path.cwd()) / name
116+
self.dataset_dir = LuxonisFileSystem.download(
117+
dataset_dir, local_path
118+
)
103119
if self.dataset_dir.suffix == ".zip":
104120
with zipfile.ZipFile(self.dataset_dir, "r") as zip_ref:
105121
unzip_dir = self.dataset_dir.parent / self.dataset_dir.stem
@@ -237,3 +253,43 @@ def _parse_split(
237253
return self.parser.parse_split(
238254
split, random_split, split_ratios, **parsed_kwargs, **kwargs
239255
)
256+
257+
def _download_roboflow_dataset(
258+
self, dataset_dir: str, local_path: Optional[Path]
259+
) -> Tuple[Path, str]:
260+
if find_spec("roboflow") is None:
261+
_pip_install("roboflow", "roboflow", "0.1.1")
262+
263+
from roboflow import Roboflow
264+
265+
if environ.ROBOFLOW_API_KEY is None:
266+
raise RuntimeError(
267+
"ROBOFLOW_API_KEY environment variable is not set. "
268+
"Please set it to your Roboflow API key."
269+
)
270+
271+
rf = Roboflow(api_key=environ.ROBOFLOW_API_KEY)
272+
parts = dataset_dir.split("roboflow://")[1].split("/")
273+
if len(parts) != 4:
274+
raise ValueError(
275+
f"Incorrect Roboflow dataset URL: `{dataset_dir}`. "
276+
"Expected format: `roboflow://workspace/project/version/format`."
277+
)
278+
workspace, project, version, format = dataset_dir.split("roboflow://")[
279+
1
280+
].split("/")
281+
try:
282+
version = int(version)
283+
except ValueError as e:
284+
raise ValueError(
285+
f"Roboflow version must be an integer, got `{version}`."
286+
) from e
287+
288+
local_path = local_path or Path.cwd() / f"{project}_{format}"
289+
dataset = (
290+
rf.workspace(workspace)
291+
.project(project)
292+
.version(int(version))
293+
.download(format, str(local_path / project))
294+
)
295+
return Path(dataset.location), project

luxonis_ml/data/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ pycocotools>=2.0.7
1212
typeguard>=4.1.0
1313
polars[timezone]>=0.20.31
1414
ordered-set>=4.0.0
15+
# roboflow>=0.1.1

luxonis_ml/utils/environ.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ class Environ(BaseSettings):
3434
LUXONISML_BASE_PATH: Path = Path.home() / "luxonis_ml"
3535
LUXONISML_TEAM_ID: str = "offline"
3636

37+
ROBOFLOW_API_KEY: Optional[str] = None
38+
3739
GOOGLE_APPLICATION_CREDENTIALS: Optional[str] = None
3840

3941
LOG_LEVEL: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = (

luxonis_ml/utils/filesystem.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -674,19 +674,12 @@ def upload(local_path: PathType, url: str) -> None:
674674

675675

676676
def _check_package_installed(protocol: str) -> None: # pragma: no cover
677-
def _pip_install(package: str, version: str) -> None:
678-
logger.error(f"{package} is necessary for {protocol} protocol.")
679-
logger.info(f"Installing {package}...")
680-
subprocess.run(
681-
[sys.executable, "-m", "pip", "install", f"{package}>={version}"]
682-
)
683-
684677
if protocol in ["gs", "gcs"] and find_spec("gcsfs") is None:
685-
_pip_install("gcsfs", "2023.3.0")
678+
_pip_install(protocol, "gcsfs", "2023.3.0")
686679
elif protocol == "s3" and find_spec("s3fs") is None:
687-
_pip_install("s3fs", "2023.3.0")
680+
_pip_install(protocol, "s3fs", "2023.3.0")
688681
elif protocol == "mlflow" and find_spec("mlflow") is None:
689-
_pip_install("mlflow", "2.10.0")
682+
_pip_install(protocol, "mlflow", "2.10.0")
690683

691684

692685
def _get_protocol_and_path(path: str) -> Tuple[str, Optional[str]]:
@@ -702,3 +695,11 @@ def _get_protocol_and_path(path: str) -> Tuple[str, Optional[str]]:
702695
protocol = "file"
703696

704697
return protocol, path if path else None
698+
699+
700+
def _pip_install(protocol: str, package: str, version: str) -> None:
701+
logger.error(f"'{package}' is necessary for '{protocol}://' protocol.")
702+
logger.info(f"Installing {package}...")
703+
subprocess.run(
704+
[sys.executable, "-m", "pip", "install", f"{package}>={version}"]
705+
)

tests/test_data/test_parsers.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from luxonis_ml.data import LabelType, LuxonisLoader, LuxonisParser
66
from luxonis_ml.enums import DatasetType
7+
from luxonis_ml.utils import environ
78

89
URL_PREFIX: Final[str] = "gs://luxonis-test-bucket/luxonis-ml-test-data"
910
WORK_DIR: Final[str] = "tests/data/parser_datasets"
@@ -82,13 +83,24 @@ def prepare_dir():
8283
"D1_ParkingSlot-solo.zip",
8384
[LabelType.BOUNDINGBOX, LabelType.SEGMENTATION],
8485
),
86+
(
87+
DatasetType.COCO,
88+
"roboflow://team-roboflow/coco-128/2/coco",
89+
[LabelType.BOUNDINGBOX, LabelType.CLASSIFICATION],
90+
),
8591
],
8692
)
8793
def test_dir_parser(
8894
dataset_type: DatasetType, url: str, expected_label_types: List[LabelType]
8995
):
96+
if not url.startswith("roboflow://"):
97+
url = f"{URL_PREFIX}/{url}"
98+
99+
elif environ.ROBOFLOW_API_KEY is None:
100+
pytest.skip("Roboflow API key is not set")
101+
90102
parser = LuxonisParser(
91-
f"{URL_PREFIX}/{url}",
103+
url,
92104
dataset_name=f"test-{dataset_type}",
93105
delete_existing=True,
94106
save_dir=WORK_DIR,

0 commit comments

Comments
 (0)