Skip to content

Commit ca80fe8

Browse files
authored
implement DatasetDict (#296)
* add utils.hydra.resolve_target() * implement DatasetDict * add documentation to methods * rename from_hf_dataset to from_hf and allow HF Dataset or HF IterableDataset as input * improve documentation * fix tests
1 parent 3dcbb4c commit ca80fe8

File tree

9 files changed

+1129
-6
lines changed

9 files changed

+1129
-6
lines changed

src/pytorch_ie/data/__init__.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,8 @@
1-
from typing import Dict, Union
2-
3-
from datasets import Split
4-
51
from .builder import GeneratorBasedBuilder
62
from .dataset import Dataset, IterableDataset
3+
from .dataset_dict import DatasetDict
74
from .dataset_formatter import DocumentFormatter
85

9-
DatasetDict = Dict[Union[str, Split], Dataset]
10-
116
__all__ = [
127
"GeneratorBasedBuilder",
138
"Dataset",

src/pytorch_ie/data/common.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
from abc import ABC, abstractmethod
2+
from typing import Optional, Union
3+
4+
from .dataset import Dataset, IterableDataset
5+
6+
7+
class EnterDatasetMixin(ABC):
8+
"""Mixin for processors that enter a dataset context."""
9+
10+
@abstractmethod
11+
def enter_dataset(
12+
self, dataset: Union[Dataset, IterableDataset], name: Optional[str] = None
13+
) -> None:
14+
"""Enter dataset context."""
15+
16+
17+
class ExitDatasetMixin(ABC):
18+
"""Mixin for processors that exit a dataset context."""
19+
20+
@abstractmethod
21+
def exit_dataset(
22+
self, dataset: Union[Dataset, IterableDataset], name: Optional[str] = None
23+
) -> None:
24+
"""Exit dataset context."""
25+
26+
27+
class EnterDatasetDictMixin(ABC):
28+
"""Mixin for processors that enter a dataset dict context."""
29+
30+
@abstractmethod
31+
def enter_dataset_dict(self, dataset_dict) -> None:
32+
"""Enter dataset dict context."""
33+
34+
35+
class ExitDatasetDictMixin(ABC):
36+
"""Mixin for processors that exit a dataset dict context."""
37+
38+
@abstractmethod
39+
def exit_dataset_dict(self, dataset_dict) -> None:
40+
"""Exit dataset dict context."""

0 commit comments

Comments
 (0)