Skip to content

Commit 2adb47e

Browse files
feat: enhance completeness and validation reporting for datasets
1 parent ac6ffbb commit 2adb47e

File tree

5 files changed

+533
-29
lines changed

5 files changed

+533
-29
lines changed

internal/completeness/completeness.go

Lines changed: 117 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,20 @@ type Report struct {
1414

1515
MissingRequired []metadata.Key
1616
MissingOptional []metadata.Key
17+
18+
// Dataset-specific tracking
19+
DatasetReports map[string]DatasetReport // key is dataset name/ref
20+
}
21+
22+
type DatasetReport struct {
23+
DatasetRef string // Reference to the dataset
24+
25+
Score float64 // 0..1
26+
Passed int
27+
Total int
28+
29+
MissingRequired []metadata.DatasetKey
30+
MissingOptional []metadata.DatasetKey
1731
}
1832

1933
func Check(bom *cdx.BOM) Report {
@@ -25,10 +39,27 @@ func Check(bom *cdx.BOM) Report {
2539
missingOpt []metadata.Key
2640
)
2741

42+
// Check if datasets are referenced in model
43+
datasetsReferenced := hasDatasetsReferenced(bom)
44+
2845
for _, spec := range metadata.Registry() {
2946
if spec.Weight <= 0 {
3047
continue
3148
}
49+
50+
// Skip dataset field if no datasets are referenced
51+
if spec.Key == metadata.ModelCardModelParametersDatasets && !datasetsReferenced {
52+
// Only count as missing if no datasets are referenced
53+
total++
54+
max += spec.Weight
55+
if spec.Required {
56+
missingReq = append(missingReq, spec.Key)
57+
} else {
58+
missingOpt = append(missingOpt, spec.Key)
59+
}
60+
continue
61+
}
62+
3263
total++
3364
max += spec.Weight
3465

@@ -55,7 +86,92 @@ func Check(bom *cdx.BOM) Report {
5586
score = earned / max
5687
}
5788

58-
return Report{
89+
report := Report{
90+
Score: score,
91+
Passed: passed,
92+
Total: total,
93+
MissingRequired: missingReq,
94+
MissingOptional: missingOpt,
95+
DatasetReports: make(map[string]DatasetReport),
96+
}
97+
98+
// Check dataset components if they exist
99+
if bom.Components != nil && datasetsReferenced {
100+
for _, comp := range *bom.Components {
101+
if comp.Type == cdx.ComponentTypeData {
102+
dsReport := CheckDataset(&comp)
103+
report.DatasetReports[comp.Name] = dsReport
104+
}
105+
}
106+
}
107+
108+
return report
109+
}
110+
111+
// hasDatasetsReferenced checks if the model references any datasets
112+
func hasDatasetsReferenced(bom *cdx.BOM) bool {
113+
if bom == nil || bom.Metadata == nil || bom.Metadata.Component == nil {
114+
return false
115+
}
116+
comp := bom.Metadata.Component
117+
if comp.ModelCard == nil || comp.ModelCard.ModelParameters == nil {
118+
return false
119+
}
120+
mp := comp.ModelCard.ModelParameters
121+
if mp.Datasets == nil || len(*mp.Datasets) == 0 {
122+
return false
123+
}
124+
// Check if any dataset ref is non-empty
125+
for _, ds := range *mp.Datasets {
126+
if ds.Ref != "" {
127+
return true
128+
}
129+
}
130+
return false
131+
}
132+
133+
// CheckDataset checks completeness of a single dataset component
134+
func CheckDataset(comp *cdx.Component) DatasetReport {
135+
var (
136+
earned, max float64
137+
passed int
138+
total int
139+
missingReq []metadata.DatasetKey
140+
missingOpt []metadata.DatasetKey
141+
)
142+
143+
for _, spec := range metadata.DatasetRegistry() {
144+
if spec.Weight <= 0 {
145+
continue
146+
}
147+
total++
148+
max += spec.Weight
149+
150+
ok := false
151+
if spec.Present != nil {
152+
ok = spec.Present(comp)
153+
}
154+
155+
if ok {
156+
passed++
157+
earned += spec.Weight
158+
continue
159+
}
160+
161+
if spec.Required {
162+
missingReq = append(missingReq, spec.Key)
163+
} else {
164+
missingOpt = append(missingOpt, spec.Key)
165+
}
166+
}
167+
168+
score := 0.0
169+
if max > 0 {
170+
score = earned / max
171+
}
172+
173+
return DatasetReport{
174+
DatasetRef: comp.Name,
59175
Score: score,
60176
Passed: passed,
61177
Total: total,

internal/completeness/report.go

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,31 @@ import (
99
// PrintReport writes the report to the configured logger writer.
1010
// If no logger writer is configured, it produces no output.
1111
func PrintReport(r Report) {
12-
logf("score=%.1f%% (%d/%d)", r.Score*100, r.Passed, r.Total)
12+
logf("Model score=%.1f%% (%d/%d)", r.Score*100, r.Passed, r.Total)
1313

1414
if len(r.MissingRequired) > 0 {
1515
logf("missing required: %s", joinKeys(r.MissingRequired))
1616
}
1717
if len(r.MissingOptional) > 0 {
1818
logf("missing optional: %s", joinKeys(r.MissingOptional))
1919
}
20+
21+
// Print dataset reports if any
22+
if len(r.DatasetReports) > 0 {
23+
logf("")
24+
logf("Dataset Components:")
25+
for dsName, dsReport := range r.DatasetReports {
26+
logf(" %s: score=%.1f%% (%d/%d)", dsName, dsReport.Score*100, dsReport.Passed, dsReport.Total)
27+
if len(dsReport.MissingRequired) > 0 {
28+
logf(" missing required: %s", joinDatasetKeys(dsReport.MissingRequired))
29+
}
30+
if len(dsReport.MissingOptional) > 0 {
31+
logf(" missing optional: %s", joinDatasetKeys(dsReport.MissingOptional))
32+
}
33+
}
34+
}
2035
}
36+
2137
func joinKeys(keys []metadata.Key) string {
2238
if len(keys) == 0 {
2339
return ""
@@ -31,3 +47,17 @@ func joinKeys(keys []metadata.Key) string {
3147
}
3248
return b.String()
3349
}
50+
51+
func joinDatasetKeys(keys []metadata.DatasetKey) string {
52+
if len(keys) == 0 {
53+
return ""
54+
}
55+
var b strings.Builder
56+
for i, k := range keys {
57+
if i > 0 {
58+
b.WriteString(", ")
59+
}
60+
b.WriteString(k.String())
61+
}
62+
return b.String()
63+
}

0 commit comments

Comments
 (0)