Skip to content

Commit cdbfe13

Browse files
Refactor: first step in getting rid of the fieldspecs monolith file
1 parent dbee386 commit cdbfe13

File tree

15 files changed

+2671
-2250
lines changed

15 files changed

+2671
-2250
lines changed

internal/builder/bom_builder.go

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,7 @@ func (b BOMBuilder) Build(ctx BuildContext) (*cdx.BOM, error) {
4040
}
4141

4242
for _, spec := range metadata.Registry() {
43-
if spec.Apply != nil {
44-
spec.Apply(src, tgt)
45-
}
43+
metadata.ApplyFromSources(spec, src, tgt)
4644
}
4745

4846
logf(ctx.ModelID, "build ok")
@@ -69,9 +67,7 @@ func (b BOMBuilder) BuildDataset(ctx DatasetBuildContext) (*cdx.Component, error
6967
}
7068

7169
for _, spec := range metadata.DatasetRegistry() {
72-
if spec.Apply != nil {
73-
spec.Apply(src, tgt)
74-
}
70+
metadata.ApplyDatasetFromSources(spec, src, tgt)
7571
}
7672

7773
logf(ctx.DatasetID, "build dataset ok")

internal/completeness/completeness_test.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,7 @@ func buildFullyPopulatedBOMForRegistry(t *testing.T) *cdx.BOM {
9191
}
9292

9393
for _, spec := range metadata.Registry() {
94-
if spec.Apply != nil {
95-
spec.Apply(src, tgt)
96-
}
94+
metadata.ApplyFromSources(spec, src, tgt)
9795
}
9896

9997
return bom

internal/enricher/enricher.go

Lines changed: 16 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -185,10 +185,8 @@ func (e *Enricher) enrichModel(bom *cdx.BOM, modelID string, hfAPI *fetcher.Mode
185185
logf(modelID, "failed to apply %s: %v", spec.Key, err)
186186
continue
187187
}
188-
// Only track the change if it was successfully applied
189-
if spec.SetUserValue != nil {
190-
changes[spec.Key] = formatValue(value)
191-
}
188+
// Track the change if it was successfully applied
189+
changes[spec.Key] = formatValue(value)
192190
}
193191
}
194192

@@ -260,10 +258,8 @@ func (e *Enricher) enrichDataset(bom *cdx.BOM, comp *cdx.Component, configViper
260258
logf(datasetID, "failed to apply %s: %v", spec.Key, err)
261259
continue
262260
}
263-
// Only track the change if it was successfully applied
264-
if spec.SetUserValue != nil {
265-
changes[spec.Key] = formatValue(value)
266-
}
261+
// Track the change if it was successfully applied
262+
changes[spec.Key] = formatValue(value)
267263
}
268264
}
269265

@@ -352,12 +348,10 @@ func (e *Enricher) applyRefetchedMetadata(bom *cdx.BOM, modelID string, hfAPI *f
352348
totalSpecs := 0
353349
specsWithWeight := 0
354350
for _, spec := range metadata.Registry() {
355-
if spec.Apply != nil {
356-
spec.Apply(src, tgt)
357-
totalSpecs++
358-
if spec.Weight > 0 {
359-
specsWithWeight++
360-
}
351+
metadata.ApplyFromSources(spec, src, tgt)
352+
totalSpecs++
353+
if spec.Weight > 0 {
354+
specsWithWeight++
361355
}
362356
}
363357

@@ -456,17 +450,11 @@ func (e *Enricher) applyValue(spec metadata.FieldSpec, src *metadata.Source, tgt
456450
strValue := fmt.Sprintf("%v", value)
457451

458452
// Use the FieldSpec's SetUserValue if available
459-
if spec.SetUserValue != nil {
460-
err := spec.SetUserValue(strValue, *tgt)
461-
if err != nil {
462-
return fmt.Errorf("failed to set user value for %s: %w", spec.Key, err)
463-
}
464-
logf(src.ModelID, "applied user value for %s", spec.Key)
465-
return nil
453+
err := metadata.ApplyUserValue(spec, strValue, *tgt)
454+
if err != nil {
455+
return fmt.Errorf("failed to set user value for %s: %w", spec.Key, err)
466456
}
467-
468-
// Fallback: if no SetUserValue, log a warning
469-
logf(src.ModelID, "warning: no SetUserValue function for %s, value not applied", spec.Key)
457+
logf(src.ModelID, "applied user value for %s", spec.Key)
470458
return nil
471459
}
472460

@@ -706,15 +694,10 @@ func (e *Enricher) getDatasetSuggestions(spec metadata.DatasetFieldSpec) []strin
706694
func (e *Enricher) applyDatasetValue(spec metadata.DatasetFieldSpec, src *metadata.DatasetSource, tgt *metadata.DatasetTarget, value interface{}) error {
707695
strValue := fmt.Sprintf("%v", value)
708696

709-
if spec.SetUserValue != nil {
710-
err := spec.SetUserValue(strValue, *tgt)
711-
if err != nil {
712-
return fmt.Errorf("failed to set user value for %s: %w", spec.Key, err)
713-
}
714-
logf(src.DatasetID, "applied user value for %s", spec.Key)
715-
return nil
697+
err := metadata.ApplyDatasetUserValue(spec, strValue, *tgt)
698+
if err != nil {
699+
return fmt.Errorf("failed to set user value for %s: %w", spec.Key, err)
716700
}
717-
718-
logf(src.DatasetID, "warning: no SetUserValue function for %s, value not applied", spec.Key)
701+
logf(src.DatasetID, "applied user value for %s", spec.Key)
719702
return nil
720703
}

internal/metadata/apply.go

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
package metadata
2+
3+
import "fmt"
4+
5+
type applyInput struct {
6+
Value any
7+
Force bool
8+
}
9+
10+
// ApplyFromSources applies the first available source value using spec.Apply.
11+
func ApplyFromSources(spec FieldSpec, src Source, tgt Target) {
12+
if spec.Apply == nil || len(spec.Sources) == 0 {
13+
return
14+
}
15+
for _, get := range spec.Sources {
16+
if get == nil {
17+
continue
18+
}
19+
value, ok := get(src)
20+
if !ok {
21+
continue
22+
}
23+
_ = spec.Apply(tgt, applyInput{Value: value, Force: false})
24+
return
25+
}
26+
}
27+
28+
// ApplyUserValue parses and applies a user-provided value using spec.Parse and spec.Apply.
29+
func ApplyUserValue(spec FieldSpec, value string, tgt Target) error {
30+
if spec.Parse == nil || spec.Apply == nil {
31+
return fmt.Errorf("spec missing Parse/Apply for %s", spec.Key)
32+
}
33+
parsed, err := spec.Parse(value)
34+
if err != nil {
35+
return err
36+
}
37+
return spec.Apply(tgt, applyInput{Value: parsed, Force: true})
38+
}
39+
40+
// ApplyDatasetFromSources applies the first available dataset source value.
41+
func ApplyDatasetFromSources(spec DatasetFieldSpec, src DatasetSource, tgt DatasetTarget) {
42+
if spec.Apply == nil || len(spec.Sources) == 0 {
43+
return
44+
}
45+
for _, get := range spec.Sources {
46+
if get == nil {
47+
continue
48+
}
49+
value, ok := get(src)
50+
if !ok {
51+
continue
52+
}
53+
_ = spec.Apply(tgt, applyInput{Value: value, Force: false})
54+
return
55+
}
56+
}
57+
58+
// ApplyDatasetUserValue parses and applies a dataset user value.
59+
func ApplyDatasetUserValue(spec DatasetFieldSpec, value string, tgt DatasetTarget) error {
60+
if spec.Parse == nil || spec.Apply == nil {
61+
return fmt.Errorf("spec missing Parse/Apply for %s", spec.Key)
62+
}
63+
parsed, err := spec.Parse(value)
64+
if err != nil {
65+
return err
66+
}
67+
return spec.Apply(tgt, applyInput{Value: parsed, Force: true})
68+
}

internal/metadata/core.go

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
package metadata
2+
3+
import (
4+
"github.com/idlab-discover/AIBoMGen-cli/internal/fetcher"
5+
"github.com/idlab-discover/AIBoMGen-cli/internal/scanner"
6+
7+
cdx "github.com/CycloneDX/cyclonedx-go"
8+
)
9+
10+
// Key identifies a CycloneDX field (or pseudo-field) we want to populate/check.
11+
type Key string
12+
13+
func (k Key) String() string { return string(k) }
14+
15+
const (
16+
// BOM.metadata.component.* (MODEL)
17+
ComponentName Key = "BOM.metadata.component.name"
18+
ComponentExternalReferences Key = "BOM.metadata.component.externalReferences"
19+
ComponentTags Key = "BOM.metadata.component.tags"
20+
ComponentLicenses Key = "BOM.metadata.component.licenses"
21+
ComponentHashes Key = "BOM.metadata.component.hashes"
22+
ComponentManufacturer Key = "BOM.metadata.component.manufacturer"
23+
ComponentGroup Key = "BOM.metadata.component.group"
24+
25+
// Component-level extra properties (stored later as CycloneDX Component.Properties)
26+
ComponentPropertiesHuggingFaceLastModified Key = "BOM.metadata.component.properties.huggingface:lastModified"
27+
ComponentPropertiesHuggingFaceCreatedAt Key = "BOM.metadata.component.properties.huggingface:createdAt"
28+
ComponentPropertiesHuggingFaceLanguage Key = "BOM.metadata.component.properties.huggingface:language"
29+
ComponentPropertiesHuggingFaceUsedStorage Key = "BOM.metadata.component.properties.huggingface:usedStorage"
30+
ComponentPropertiesHuggingFacePrivate Key = "BOM.metadata.component.properties.huggingface:private"
31+
ComponentPropertiesHuggingFaceLibraryName Key = "BOM.metadata.component.properties.huggingface:libraryName"
32+
ComponentPropertiesHuggingFaceDownloads Key = "BOM.metadata.component.properties.huggingface:downloads"
33+
ComponentPropertiesHuggingFaceLikes Key = "BOM.metadata.component.properties.huggingface:likes"
34+
ComponentPropertiesHuggingFaceBaseModel Key = "BOM.metadata.component.properties.huggingface:baseModel"
35+
ComponentPropertiesHuggingFaceContact Key = "BOM.metadata.component.properties.huggingface:modelCardContact"
36+
37+
// BOM.metadata.component.modelCard.* (MODEL CARD)
38+
ModelCardModelParametersTask Key = "BOM.metadata.component.modelCard.modelParameters.task"
39+
ModelCardModelParametersArchitectureFamily Key = "BOM.metadata.component.modelCard.modelParameters.architectureFamily"
40+
ModelCardModelParametersModelArchitecture Key = "BOM.metadata.component.modelCard.modelParameters.modelArchitecture"
41+
ModelCardModelParametersDatasets Key = "BOM.metadata.component.modelCard.modelParameters.datasets"
42+
ModelCardConsiderationsUseCases Key = "BOM.metadata.component.modelCard.considerations.useCases"
43+
ModelCardConsiderationsTechnicalLimitations Key = "BOM.metadata.component.modelCard.considerations.technicalLimitations"
44+
ModelCardConsiderationsEthicalConsiderations Key = "BOM.metadata.component.modelCard.considerations.ethicalConsiderations"
45+
ModelCardQuantitativeAnalysisPerformanceMetrics Key = "BOM.metadata.component.modelCard.quantitativeAnalysis.performanceMetrics"
46+
ModelCardConsiderationsEnvironmentalConsiderationsProperties Key = "BOM.metadata.component.modelCard.considerations.environmentalConsiderations.properties"
47+
)
48+
49+
// DatasetKey identifies dataset-specific CycloneDX fields
50+
type DatasetKey string
51+
52+
func (k DatasetKey) String() string { return string(k) }
53+
54+
const (
55+
// BOM.components[DATA].* (DATASET)
56+
DatasetName DatasetKey = "BOM.components[DATA].name"
57+
DatasetExternalReferences DatasetKey = "BOM.components[DATA].externalReferences"
58+
DatasetTags DatasetKey = "BOM.components[DATA].tags"
59+
DatasetLicenses DatasetKey = "BOM.components[DATA].licenses"
60+
DatasetDescription DatasetKey = "BOM.components[DATA].data.description"
61+
DatasetManufacturer DatasetKey = "BOM.components[DATA].manufacturer"
62+
DatasetAuthor DatasetKey = "BOM.components[DATA].author"
63+
DatasetGroup DatasetKey = "BOM.components[DATA].group"
64+
DatasetContents DatasetKey = "BOM.components[DATA].data.contents.attachments"
65+
DatasetSensitiveData DatasetKey = "BOM.components[DATA].data.sensitiveData"
66+
DatasetClassification DatasetKey = "BOM.components[DATA].data.classification"
67+
DatasetGovernance DatasetKey = "BOM.components[DATA].data.governance"
68+
DatasetHashes DatasetKey = "BOM.components[DATA].hashes"
69+
DatasetContact DatasetKey = "BOM.components[DATA].properties.contact"
70+
DatasetCreatedAt DatasetKey = "BOM.components[DATA].properties.createdAt"
71+
DatasetUsedStorage DatasetKey = "BOM.components[DATA].properties.usedStorage"
72+
DatasetLastModified DatasetKey = "BOM.components[DATA].tags.lastModified"
73+
)
74+
75+
// Source is everything FieldSpecs can read from.
76+
type Source struct {
77+
ModelID string
78+
Scan scanner.Discovery
79+
HF *fetcher.ModelAPIResponse
80+
Readme *fetcher.ModelReadmeCard
81+
}
82+
83+
// Target is everything FieldSpecs are allowed to mutate.
84+
type Target struct {
85+
BOM *cdx.BOM
86+
Component *cdx.Component
87+
ModelCard *cdx.MLModelCard
88+
89+
// Options (builder can set these when calling Apply)
90+
IncludeEvidenceProperties bool
91+
HuggingFaceBaseURL string
92+
}
93+
94+
// DatasetSource mirrors Source but for datasets
95+
type DatasetSource struct {
96+
DatasetID string
97+
Scan scanner.Discovery
98+
HF *fetcher.DatasetAPIResponse
99+
Readme *fetcher.DatasetReadmeCard
100+
}
101+
102+
// DatasetTarget is the dataset component being built
103+
type DatasetTarget struct {
104+
Component *cdx.Component
105+
106+
// Options
107+
IncludeEvidenceProperties bool
108+
HuggingFaceBaseURL string
109+
}
110+
111+
// FieldSpec is a first-class definition of a field:
112+
// - how it contributes to completeness
113+
// - how it is populated into the BOM
114+
// - how its presence is detected
115+
// - how user-provided values are set
116+
type FieldSpec struct {
117+
Key Key
118+
Weight float64
119+
Required bool
120+
121+
Sources []func(Source) (any, bool)
122+
Parse func(string) (any, error)
123+
Apply func(Target, any) error
124+
Present func(*cdx.BOM) bool
125+
}
126+
127+
// DatasetFieldSpec is the dataset analog of FieldSpec
128+
type DatasetFieldSpec struct {
129+
Key DatasetKey
130+
Weight float64
131+
Required bool
132+
133+
Sources []func(DatasetSource) (any, bool)
134+
Parse func(string) (any, error)
135+
Apply func(DatasetTarget, any) error
136+
Present func(comp *cdx.Component) bool
137+
}
138+
139+
// Registry is the central registry of all known FieldSpecs.
140+
// Each spec defines how to apply itself and how to check presence.
141+
// The registry is used by the BOM builder and completeness checker.
142+
// It is the single source of truth for what fields we care about.
143+
func Registry() []FieldSpec {
144+
specs := make([]FieldSpec, 0, 32)
145+
specs = append(specs, componentFields()...)
146+
specs = append(specs, evidenceFields()...)
147+
specs = append(specs, hfPropFields()...)
148+
specs = append(specs, modelCardFields()...)
149+
return specs
150+
}

0 commit comments

Comments
 (0)