@@ -297,13 +297,17 @@ func DatasetRegistry() []DatasetFieldSpec {
297297 Required : false ,
298298 Sources : []func (DatasetSource ) (any , bool ){
299299 func (src DatasetSource ) (any , bool ) {
300- if src .Readme == nil {
301- return nil , false
300+ // First try API author (authors[0])
301+ if src .HF != nil && strings .TrimSpace (src .HF .Author ) != "" {
302+ return strings .TrimSpace (src .HF .Author ), true
302303 }
303- if len (src .Readme .AnnotationCreators ) == 0 {
304- return nil , false
304+ // Fallback to first AnnotationCreator from README (authors[1])
305+ if src .Readme != nil && len (src .Readme .AnnotationCreators ) > 0 {
306+ if trimmed := strings .TrimSpace (src .Readme .AnnotationCreators [0 ]); trimmed != "" {
307+ return trimmed , true
308+ }
305309 }
306- return strings . TrimSpace ( src . Readme . AnnotationCreators [ 0 ]), true
310+ return nil , false
307311 },
308312 },
309313 Parse : func (value string ) (any , error ) {
@@ -332,19 +336,98 @@ func DatasetRegistry() []DatasetFieldSpec {
332336 return comp != nil && comp .Manufacturer != nil && strings .TrimSpace (comp .Manufacturer .Name ) != ""
333337 },
334338 },
339+ {
340+ Key : DatasetAuthors ,
341+ Weight : 0.6 ,
342+ Required : false ,
343+ Sources : []func (DatasetSource ) (any , bool ){
344+ func (src DatasetSource ) (any , bool ) {
345+ var allAuthors []string
346+
347+ // First, add API author if available
348+ if src .HF != nil && strings .TrimSpace (src .HF .Author ) != "" {
349+ allAuthors = append (allAuthors , strings .TrimSpace (src .HF .Author ))
350+ }
351+
352+ // Then, add annotation creators from README
353+ if src .Readme != nil && len (src .Readme .AnnotationCreators ) > 0 {
354+ for _ , creator := range src .Readme .AnnotationCreators {
355+ if trimmed := strings .TrimSpace (creator ); trimmed != "" {
356+ allAuthors = append (allAuthors , trimmed )
357+ }
358+ }
359+ }
360+
361+ if len (allAuthors ) == 0 {
362+ return nil , false
363+ }
364+ return allAuthors , true
365+ },
366+ },
367+ Parse : func (value string ) (any , error ) {
368+ parts := strings .Split (value , "," )
369+ authors := normalizeStrings (parts )
370+ return authors , nil
371+ },
372+ Apply : func (tgt DatasetTarget , value any ) error {
373+ input , ok := value .(applyInput )
374+ if ! ok {
375+ return fmt .Errorf ("invalid input for %s" , DatasetAuthors )
376+ }
377+ if tgt .Component == nil {
378+ return fmt .Errorf ("component is nil" )
379+ }
380+ var authors []cdx.OrganizationalContact
381+ switch v := input .Value .(type ) {
382+ case []string :
383+ for _ , authorName := range v {
384+ if trimmed := strings .TrimSpace (authorName ); trimmed != "" {
385+ authors = append (authors , cdx.OrganizationalContact {
386+ Name : trimmed ,
387+ })
388+ }
389+ }
390+ case string :
391+ if trimmed := strings .TrimSpace (v ); trimmed != "" {
392+ authors = append (authors , cdx.OrganizationalContact {
393+ Name : trimmed ,
394+ })
395+ }
396+ }
397+ if len (authors ) == 0 {
398+ return fmt .Errorf ("authors value is empty" )
399+ }
400+ if ! input .Force && tgt .Component .Authors != nil && len (* tgt .Component .Authors ) > 0 {
401+ return nil
402+ }
403+ tgt .Component .Authors = & authors
404+ return nil
405+ },
406+ Present : func (comp * cdx.Component ) bool {
407+ return comp != nil && comp .Authors != nil && len (* comp .Authors ) > 0
408+ },
409+ },
335410 {
336411 Key : DatasetGroup ,
337412 Weight : 0.4 ,
338413 Required : false ,
339414 Sources : []func (DatasetSource ) (any , bool ){
340415 func (src DatasetSource ) (any , bool ) {
341- if src .Readme == nil {
342- return nil , false
416+ // Extract group from DatasetID (part before /)
417+ var datasetID string
418+ if src .HF != nil && strings .TrimSpace (src .HF .ID ) != "" {
419+ datasetID = strings .TrimSpace (src .HF .ID )
420+ } else {
421+ datasetID = strings .TrimSpace (src .DatasetID )
343422 }
344- if len ( src . Readme . AnnotationCreators ) < 2 {
423+ if datasetID == "" {
345424 return nil , false
346425 }
347- return strings .TrimSpace (src .Readme .AnnotationCreators [1 ]), true
426+ parts := strings .SplitN (datasetID , "/" , 2 )
427+ if len (parts ) > 0 && strings .TrimSpace (parts [0 ]) != "" {
428+ return strings .TrimSpace (parts [0 ]), true
429+ }
430+ return nil , false
348431 },
349432 },
350433 Parse : func (value string ) (any , error ) {
@@ -669,11 +752,11 @@ func DatasetRegistry() []DatasetFieldSpec {
669752 return fmt .Errorf ("component is nil" )
670753 }
671754 createdAt , _ := input .Value .(string )
672- setProperty (tgt .Component , "createdAt" , strings .TrimSpace (createdAt ))
755+ setProperty (tgt .Component , "huggingface: createdAt" , strings .TrimSpace (createdAt ))
673756 return nil
674757 },
675758 Present : func (comp * cdx.Component ) bool {
676- return hasProperty (comp , "createdAt" )
759+ return hasProperty (comp , "huggingface: createdAt" )
677760 },
678761 },
679762 {
@@ -700,11 +783,11 @@ func DatasetRegistry() []DatasetFieldSpec {
700783 return fmt .Errorf ("component is nil" )
701784 }
702785 usedStorage , _ := input .Value .(string )
703- setProperty (tgt .Component , "usedStorage" , strings .TrimSpace (usedStorage ))
786+ setProperty (tgt .Component , "huggingface: usedStorage" , strings .TrimSpace (usedStorage ))
704787 return nil
705788 },
706789 Present : func (comp * cdx.Component ) bool {
707- return hasProperty (comp , "usedStorage" )
790+ return hasProperty (comp , "huggingface: usedStorage" )
708791 },
709792 },
710793 {
@@ -793,11 +876,11 @@ func DatasetRegistry() []DatasetFieldSpec {
793876 if tgt .Component == nil {
794877 return fmt .Errorf ("component is nil" )
795878 }
796- setProperty (tgt .Component , "contact " , contact )
879+ setProperty (tgt .Component , "huggingface:datasetContact " , contact )
797880 return nil
798881 },
799882 Present : func (comp * cdx.Component ) bool {
800- return hasProperty (comp , "contact " )
883+ return hasProperty (comp , "huggingface:datasetContact " )
801884 },
802885 },
803886 }
0 commit comments