|
| 1 | +package metadata |
| 2 | + |
| 3 | +import ( |
| 4 | + "github.com/idlab-discover/AIBoMGen-cli/internal/fetcher" |
| 5 | + "github.com/idlab-discover/AIBoMGen-cli/internal/scanner" |
| 6 | + |
| 7 | + cdx "github.com/CycloneDX/cyclonedx-go" |
| 8 | +) |
| 9 | + |
| 10 | +// Key identifies a CycloneDX field (or pseudo-field) we want to populate/check. |
| 11 | +type Key string |
| 12 | + |
| 13 | +func (k Key) String() string { return string(k) } |
| 14 | + |
| 15 | +const ( |
| 16 | + // BOM.metadata.component.* (MODEL) |
| 17 | + ComponentName Key = "BOM.metadata.component.name" |
| 18 | + ComponentExternalReferences Key = "BOM.metadata.component.externalReferences" |
| 19 | + ComponentTags Key = "BOM.metadata.component.tags" |
| 20 | + ComponentLicenses Key = "BOM.metadata.component.licenses" |
| 21 | + ComponentHashes Key = "BOM.metadata.component.hashes" |
| 22 | + ComponentManufacturer Key = "BOM.metadata.component.manufacturer" |
| 23 | + ComponentGroup Key = "BOM.metadata.component.group" |
| 24 | + |
| 25 | + // Component-level extra properties (stored later as CycloneDX Component.Properties) |
| 26 | + ComponentPropertiesHuggingFaceLastModified Key = "BOM.metadata.component.properties.huggingface:lastModified" |
| 27 | + ComponentPropertiesHuggingFaceCreatedAt Key = "BOM.metadata.component.properties.huggingface:createdAt" |
| 28 | + ComponentPropertiesHuggingFaceLanguage Key = "BOM.metadata.component.properties.huggingface:language" |
| 29 | + ComponentPropertiesHuggingFaceUsedStorage Key = "BOM.metadata.component.properties.huggingface:usedStorage" |
| 30 | + ComponentPropertiesHuggingFacePrivate Key = "BOM.metadata.component.properties.huggingface:private" |
| 31 | + ComponentPropertiesHuggingFaceLibraryName Key = "BOM.metadata.component.properties.huggingface:libraryName" |
| 32 | + ComponentPropertiesHuggingFaceDownloads Key = "BOM.metadata.component.properties.huggingface:downloads" |
| 33 | + ComponentPropertiesHuggingFaceLikes Key = "BOM.metadata.component.properties.huggingface:likes" |
| 34 | + ComponentPropertiesHuggingFaceBaseModel Key = "BOM.metadata.component.properties.huggingface:baseModel" |
| 35 | + ComponentPropertiesHuggingFaceContact Key = "BOM.metadata.component.properties.huggingface:modelCardContact" |
| 36 | + |
| 37 | + // BOM.metadata.component.modelCard.* (MODEL CARD) |
| 38 | + ModelCardModelParametersTask Key = "BOM.metadata.component.modelCard.modelParameters.task" |
| 39 | + ModelCardModelParametersArchitectureFamily Key = "BOM.metadata.component.modelCard.modelParameters.architectureFamily" |
| 40 | + ModelCardModelParametersModelArchitecture Key = "BOM.metadata.component.modelCard.modelParameters.modelArchitecture" |
| 41 | + ModelCardModelParametersDatasets Key = "BOM.metadata.component.modelCard.modelParameters.datasets" |
| 42 | + ModelCardConsiderationsUseCases Key = "BOM.metadata.component.modelCard.considerations.useCases" |
| 43 | + ModelCardConsiderationsTechnicalLimitations Key = "BOM.metadata.component.modelCard.considerations.technicalLimitations" |
| 44 | + ModelCardConsiderationsEthicalConsiderations Key = "BOM.metadata.component.modelCard.considerations.ethicalConsiderations" |
| 45 | + ModelCardQuantitativeAnalysisPerformanceMetrics Key = "BOM.metadata.component.modelCard.quantitativeAnalysis.performanceMetrics" |
| 46 | + ModelCardConsiderationsEnvironmentalConsiderationsProperties Key = "BOM.metadata.component.modelCard.considerations.environmentalConsiderations.properties" |
| 47 | +) |
| 48 | + |
| 49 | +// DatasetKey identifies dataset-specific CycloneDX fields |
| 50 | +type DatasetKey string |
| 51 | + |
| 52 | +func (k DatasetKey) String() string { return string(k) } |
| 53 | + |
| 54 | +const ( |
| 55 | + // BOM.components[DATA].* (DATASET) |
| 56 | + DatasetName DatasetKey = "BOM.components[DATA].name" |
| 57 | + DatasetExternalReferences DatasetKey = "BOM.components[DATA].externalReferences" |
| 58 | + DatasetTags DatasetKey = "BOM.components[DATA].tags" |
| 59 | + DatasetLicenses DatasetKey = "BOM.components[DATA].licenses" |
| 60 | + DatasetDescription DatasetKey = "BOM.components[DATA].data.description" |
| 61 | + DatasetManufacturer DatasetKey = "BOM.components[DATA].manufacturer" |
| 62 | + DatasetAuthor DatasetKey = "BOM.components[DATA].author" |
| 63 | + DatasetGroup DatasetKey = "BOM.components[DATA].group" |
| 64 | + DatasetContents DatasetKey = "BOM.components[DATA].data.contents.attachments" |
| 65 | + DatasetSensitiveData DatasetKey = "BOM.components[DATA].data.sensitiveData" |
| 66 | + DatasetClassification DatasetKey = "BOM.components[DATA].data.classification" |
| 67 | + DatasetGovernance DatasetKey = "BOM.components[DATA].data.governance" |
| 68 | + DatasetHashes DatasetKey = "BOM.components[DATA].hashes" |
| 69 | + DatasetContact DatasetKey = "BOM.components[DATA].properties.contact" |
| 70 | + DatasetCreatedAt DatasetKey = "BOM.components[DATA].properties.createdAt" |
| 71 | + DatasetUsedStorage DatasetKey = "BOM.components[DATA].properties.usedStorage" |
| 72 | + DatasetLastModified DatasetKey = "BOM.components[DATA].tags.lastModified" |
| 73 | +) |
| 74 | + |
| 75 | +// Source is everything FieldSpecs can read from. |
| 76 | +type Source struct { |
| 77 | + ModelID string |
| 78 | + Scan scanner.Discovery |
| 79 | + HF *fetcher.ModelAPIResponse |
| 80 | + Readme *fetcher.ModelReadmeCard |
| 81 | +} |
| 82 | + |
| 83 | +// Target is everything FieldSpecs are allowed to mutate. |
| 84 | +type Target struct { |
| 85 | + BOM *cdx.BOM |
| 86 | + Component *cdx.Component |
| 87 | + ModelCard *cdx.MLModelCard |
| 88 | + |
| 89 | + // Options (builder can set these when calling Apply) |
| 90 | + IncludeEvidenceProperties bool |
| 91 | + HuggingFaceBaseURL string |
| 92 | +} |
| 93 | + |
| 94 | +// DatasetSource mirrors Source but for datasets |
| 95 | +type DatasetSource struct { |
| 96 | + DatasetID string |
| 97 | + Scan scanner.Discovery |
| 98 | + HF *fetcher.DatasetAPIResponse |
| 99 | + Readme *fetcher.DatasetReadmeCard |
| 100 | +} |
| 101 | + |
| 102 | +// DatasetTarget is the dataset component being built |
| 103 | +type DatasetTarget struct { |
| 104 | + Component *cdx.Component |
| 105 | + |
| 106 | + // Options |
| 107 | + IncludeEvidenceProperties bool |
| 108 | + HuggingFaceBaseURL string |
| 109 | +} |
| 110 | + |
| 111 | +// FieldSpec is a first-class definition of a field: |
| 112 | +// - how it contributes to completeness |
| 113 | +// - how it is populated into the BOM |
| 114 | +// - how its presence is detected |
| 115 | +// - how user-provided values are set |
| 116 | +type FieldSpec struct { |
| 117 | + Key Key |
| 118 | + Weight float64 |
| 119 | + Required bool |
| 120 | + |
| 121 | + Sources []func(Source) (any, bool) |
| 122 | + Parse func(string) (any, error) |
| 123 | + Apply func(Target, any) error |
| 124 | + Present func(*cdx.BOM) bool |
| 125 | +} |
| 126 | + |
| 127 | +// DatasetFieldSpec is the dataset analog of FieldSpec |
| 128 | +type DatasetFieldSpec struct { |
| 129 | + Key DatasetKey |
| 130 | + Weight float64 |
| 131 | + Required bool |
| 132 | + |
| 133 | + Sources []func(DatasetSource) (any, bool) |
| 134 | + Parse func(string) (any, error) |
| 135 | + Apply func(DatasetTarget, any) error |
| 136 | + Present func(comp *cdx.Component) bool |
| 137 | +} |
| 138 | + |
| 139 | +// Registry is the central registry of all known FieldSpecs. |
| 140 | +// Each spec defines how to apply itself and how to check presence. |
| 141 | +// The registry is used by the BOM builder and completeness checker. |
| 142 | +// It is the single source of truth for what fields we care about. |
| 143 | +func Registry() []FieldSpec { |
| 144 | + specs := make([]FieldSpec, 0, 32) |
| 145 | + specs = append(specs, componentFields()...) |
| 146 | + specs = append(specs, evidenceFields()...) |
| 147 | + specs = append(specs, hfPropFields()...) |
| 148 | + specs = append(specs, modelCardFields()...) |
| 149 | + return specs |
| 150 | +} |
0 commit comments