Skip to content

Commit ac6ffbb

Browse files
feat: update dummy dataset fetchers
1 parent 20f1878 commit ac6ffbb

File tree

2 files changed

+30
-8
lines changed

2 files changed

+30
-8
lines changed

internal/fetcher/dummy_dataset_api_fetcher.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,19 @@ func (f *DummyDatasetAPIFetcher) Fetch(ctx context.Context, datasetID string) (*
1212
return &DatasetAPIResponse{
1313
ID: datasetID,
1414
Author: "huggingface",
15-
Tags: []string{"dataset", "benchmark"},
15+
SHA: "abc123def456789012345678901234567890abcd",
16+
LastMod: "2024-01-15T10:30:00.000Z",
17+
CreatedAt: "2023-06-01T08:00:00.000Z",
18+
UsedStorage: 1024000,
19+
Tags: []string{"dataset", "benchmark", "text-classification"},
1620
Description: "Dummy dataset for testing: " + datasetID,
1721
Downloads: 100000,
1822
Likes: 500,
1923
CardData: map[string]any{
20-
"language": "en",
21-
"license": "cc0-1.0",
24+
"language": "en",
25+
"license": "cc0-1.0",
26+
"task_categories": []interface{}{"text-classification", "text-generation"},
27+
"tags": []interface{}{"sentiment-analysis", "benchmark"},
2228
},
2329
}, nil
2430
}

internal/fetcher/dummy_dataset_readme_fetcher.go

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,26 @@ func (f *DummyDatasetReadmeFetcher) Fetch(ctx context.Context, datasetID string)
1818
License: "cc0-1.0",
1919
Tags: []string{"dataset", "test"},
2020
Language: []string{"en"},
21-
AnnotationCreators: []string{"huggingface"},
22-
CuratedBy: "Dummy Curator",
23-
FundedBy: "Test Foundation",
24-
SharedBy: "Test Team",
25-
DatasetDescription: "A dummy dataset for testing dataset component building",
21+
AnnotationCreators: []string{"Test Annotator", "Secondary Annotator"},
22+
Configs: []DatasetConfig{
23+
{
24+
Name: "default",
25+
DataFiles: []DatasetDataFile{
26+
{Split: "train", Path: "data/train.csv"},
27+
{Split: "test", Path: "data/test.csv"},
28+
},
29+
},
30+
},
31+
DatasetDescription: "A dummy dataset for testing dataset component building with comprehensive metadata",
32+
CuratedBy: "Dummy Curator",
33+
FundedBy: "Test Foundation",
34+
SharedBy: "Test Team",
35+
RepositoryURL: "https://huggingface.co/datasets/" + datasetID,
36+
PaperURL: "https://arxiv.org/abs/2401.12345",
37+
DemoURL: "https://huggingface.co/spaces/demo/" + datasetID,
38+
OutOfScopeUse: "This dataset should not be used for production systems without proper validation",
39+
PersonalSensitiveInfo: "This dataset may contain synthetic personal information for testing purposes",
40+
BiasRisksLimitations: "Dataset may contain inherent biases from the synthetic generation process",
41+
DatasetCardContact: "[email protected]",
2642
}, nil
2743
}

0 commit comments

Comments
 (0)