File tree 2 files changed +26
-0
lines changed
2 files changed +26
-0
lines changed Original file line number Diff line number Diff line change
1
+ from .load import load_dataset
2
+ __all__ = ['load_dataset' ]
Original file line number Diff line number Diff line change
1
+ import datasets
2
+ import os
3
+
4
+
5
+ def load_dataset (** kargs ):
6
+ '''
7
+ 加载缓存的数据
8
+ format:
9
+ '''
10
+ _CACHE_DATA_PATH = '/cognitive_comp/common_data/translation/iwslt14.de-en/'
11
+
12
+ train = datasets .load_dataset ('json' ,
13
+ data_files = os .path .join (_CACHE_DATA_PATH , "train.de_en" ),
14
+ split = datasets .Split .TRAIN )
15
+ val = datasets .load_dataset ('json' ,
16
+ data_files = {
17
+ datasets .Split .VALIDATION : os .path .join (_CACHE_DATA_PATH , "valid.de_en" )},
18
+ split = datasets .Split .VALIDATION )
19
+ test = datasets .load_dataset ('json' ,
20
+ data_files = {
21
+ datasets .Split .TEST : os .path .join (_CACHE_DATA_PATH , "test.de_en" )},
22
+ split = datasets .Split .TEST )
23
+
24
+ return datasets .DatasetDict (train = train , val = val , test = test )
You can’t perform that action at this time.
0 commit comments