1313 overload ,
1414)
1515
16+ from roboflow import Roboflow
17+
1618from luxonis_ml .data import DATASETS_REGISTRY , BaseDataset , LuxonisDataset
1719from luxonis_ml .data .utils .enums import LabelType
1820from luxonis_ml .enums import DatasetType
19- from luxonis_ml .utils import LuxonisFileSystem
21+ from luxonis_ml .utils import LuxonisFileSystem , environ
2022
2123from .base_parser import BaseParser
2224from .classification_directory_parser import ClassificationDirectoryParser
@@ -72,8 +74,15 @@ def __init__(
7274 appropriate parser.
7375
7476 @type dataset_dir: str
75- @param dataset_dir: Path to the dataset directory or zip file.
76- Can also be a remote URL supported by L{LuxonisFileSystem}.
77+ @param dataset_dir: Identifier of the dataset directory.
78+ Can be one of:
79+ - Local path to the dataset directory.
80+ - Remote URL supported by L{LuxonisFileSystem}.
81+ - C{gcs://} for Google Cloud Storage
82+ - C{s3://} for Amazon S3
83+ - C{roboflow://} for Roboflow datasets.
84+ - Expected format: C{roboflow://workspace/project/version/format}.
85+ Can be a remote URL supported by L{LuxonisFileSystem}.
7786 @type dataset_name: Optional[str]
7887 @param dataset_name: Name of the dataset. If C{None}, the name
7988 is derived from the name of the dataset directory.
@@ -97,9 +106,16 @@ def __init__(
97106 names.
98107 """
99108 save_dir = Path (save_dir ) if save_dir else None
100- name = Path (dataset_dir ).name
101- local_path = (save_dir or Path .cwd ()) / name
102- self .dataset_dir = LuxonisFileSystem .download (dataset_dir , local_path )
109+ name = dataset_dir .split ("/" )[- 1 ]
110+ if dataset_dir .startswith ("roboflow://" ):
111+ self .dataset_dir = self ._download_roboflow_dataset (
112+ dataset_dir , save_dir
113+ )
114+ else :
115+ local_path = (save_dir or Path .cwd ()) / name
116+ self .dataset_dir = LuxonisFileSystem .download (
117+ dataset_dir , local_path
118+ )
103119 if self .dataset_dir .suffix == ".zip" :
104120 with zipfile .ZipFile (self .dataset_dir , "r" ) as zip_ref :
105121 unzip_dir = self .dataset_dir .parent / self .dataset_dir .stem
@@ -237,3 +253,33 @@ def _parse_split(
237253 return self .parser .parse_split (
238254 split , random_split , split_ratios , ** parsed_kwargs , ** kwargs
239255 )
256+
257+ def _download_roboflow_dataset (
258+ self , dataset_dir : str , local_path : Optional [Path ]
259+ ) -> Path :
260+ rf = Roboflow (api_key = environ .ROBOFLOW_API_KEY )
261+ parts = dataset_dir .split ("roboflow://" )[1 ].split ("/" )
262+ if len (parts ) != 4 :
263+ raise ValueError (
264+ f"Incorrect Roboflow dataset URL: `{ dataset_dir } `. "
265+ "Expected format: `roboflow://workspace/project/version/format`."
266+ )
267+ workspace , project , version , format = dataset_dir .split ("roboflow://" )[
268+ 1
269+ ].split ("/" )
270+ try :
271+ version = int (version )
272+ except ValueError as e :
273+ raise ValueError (
274+ f"Roboflow version must be an integer, got `{ version } `."
275+ ) from e
276+
277+ local_path = local_path or Path .cwd () / f"{ project } _{ format } "
278+ dataset = (
279+ rf .workspace (workspace )
280+ .project (project )
281+ .version (int (version ))
282+ .download (format , str (local_path ))
283+ )
284+ print (dataset .location )
285+ return Path (dataset .location )
0 commit comments