Skip to content

Commit 0a505af

Browse files
committedAug 12, 2022
add iwslt14_de_en data
1 parent 9658f04 commit 0a505af

File tree

2 files changed

+26
-0
lines changed

2 files changed

+26
-0
lines changed
 

‎iwslt14_de_en/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from .load import load_dataset
2+
__all__ = ['load_dataset']

‎iwslt14_de_en/load.py

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import datasets
2+
import os
3+
4+
5+
def load_dataset(**kargs):
6+
'''
7+
加载缓存的数据
8+
format:
9+
'''
10+
_CACHE_DATA_PATH = '/cognitive_comp/common_data/translation/iwslt14.de-en/'
11+
12+
train = datasets.load_dataset('json',
13+
data_files=os.path.join(_CACHE_DATA_PATH, "train.de_en"),
14+
split=datasets.Split.TRAIN)
15+
val = datasets.load_dataset('json',
16+
data_files={
17+
datasets.Split.VALIDATION: os.path.join(_CACHE_DATA_PATH, "valid.de_en")},
18+
split=datasets.Split.VALIDATION)
19+
test = datasets.load_dataset('json',
20+
data_files={
21+
datasets.Split.TEST: os.path.join(_CACHE_DATA_PATH, "test.de_en")},
22+
split=datasets.Split.TEST)
23+
24+
return datasets.DatasetDict(train=train, val=val, test=test)

0 commit comments

Comments
 (0)
Please sign in to comment.