3838#define GAUDI_DEVICE_NAME_LEN 10
3939
4040static pthread_mutex_t gaudi_init_mutex = PTHREAD_MUTEX_INITIALIZER ;
41+ /* File-scope one-time control and status */
42+ static pthread_once_t gaudi_spinlock_once_flag = PTHREAD_ONCE_INIT ;
43+ static ucs_status_t gaudi_spinlock_init_status = UCS_OK ;
4144
4245static const ucs_sys_dev_distance_t gaudi_fallback_node_distance =
4346 {.latency = 100e-9 , .bandwidth = 17e9 }; /* 100ns, 17 GB/s */
@@ -1272,12 +1275,12 @@ static ucs_status_t ucs_gaudi_build_assignment_balanced()
12721275 }
12731276
12741277 /* Count devices per NUMA node */
1275- for (i = 0 ; i < num_gaudi_devices ; ++ i ) {
1278+ for (i = 0 ; i < num_gaudi_devices ; i ++ ) {
12761279 numa_node = ucs_gaudi_get_validated_numa_node (
12771280 ucs_gaudi_topo_ctx .gaudi_devices [i ], num_numa_nodes );
12781281 gaudi_per_numa [numa_node ]++ ;
12791282 }
1280- for (i = 0 ; i < num_hnic_devices ; ++ i ) {
1283+ for (i = 0 ; i < num_hnic_devices ; i ++ ) {
12811284 numa_node = ucs_gaudi_get_validated_numa_node (
12821285 ucs_gaudi_topo_ctx .hnic_devices [i ], num_numa_nodes );
12831286 hnic_per_numa [numa_node ]++ ;
@@ -1539,7 +1542,7 @@ ucs_status_t ucs_gaudi_find_best_connection(const char *accel_name,
15391542 ucs_spin_lock (& ucs_gaudi_topo_ctx .lock );
15401543
15411544 /* Return cached balanced assignment instead of searching connections */
1542- for (i = 0 ; i < ucs_gaudi_topo_ctx .num_gaudi_devices ; ++ i ) {
1545+ for (i = 0 ; i < ucs_gaudi_topo_ctx .num_gaudi_devices ; i ++ ) {
15431546 if (!strcmp (accel_name , ucs_gaudi_topo_ctx .gaudi_devices_names [i ])) {
15441547 break ;
15451548 }
@@ -1686,7 +1689,7 @@ static void ucs_gaudi_get_memory_distance(ucs_sys_device_t device,
16861689 /* Sum NUMA distances for CPUs in affinity set */
16871690 num_cpus = ucs_numa_num_configured_cpus ();
16881691 total_distance = 0 ;
1689- for (cpu = 0 ; cpu < num_cpus ; ++ cpu ) {
1692+ for (cpu = 0 ; cpu < num_cpus ; cpu ++ ) {
16901693 if (!full_affinity && !CPU_ISSET (cpu , & thread_cpuset )) {
16911694 continue ;
16921695 }
@@ -1712,10 +1715,15 @@ static void ucs_gaudi_get_memory_distance(ucs_sys_device_t device,
17121715 distance -> latency = (total_distance / cpuset_size ) * 10e-9 ;
17131716}
17141717
1718+ /* Initialize spinlock exactly once */
1719+ static void ucs_gaudi_spinlock_once_init ()
1720+ {
1721+ gaudi_spinlock_init_status = ucs_spinlock_init (& ucs_gaudi_topo_ctx .lock , 0 );
1722+ }
1723+
17151724/* Initialization function */
17161725void ucs_gaudi_topo_init ()
17171726{
1718- ucs_status_t status ;
17191727 const char * disable ;
17201728
17211729 disable = getenv ("UCS_GAUDI_TOPO_DISABLE" );
@@ -1730,17 +1738,16 @@ void ucs_gaudi_topo_init()
17301738 return ;
17311739 }
17321740
1741+ /* Ensure spinlock exists even if lazy init is first */
1742+ pthread_once (& gaudi_spinlock_once_flag , ucs_gaudi_spinlock_once_init );
1743+ if (gaudi_spinlock_init_status != UCS_OK ) {
1744+ ucs_error ("Failed to initialize spinlock: %s" ,
1745+ ucs_status_string (gaudi_spinlock_init_status ));
1746+ return ;
1747+ }
1748+
17331749 pthread_mutex_lock (& gaudi_init_mutex );
17341750 if (!ucs_gaudi_topo_ctx .provider_added ) {
1735- /* Initialize spinlock first */
1736- status = ucs_spinlock_init (& ucs_gaudi_topo_ctx .lock , 0 );
1737- if (status != UCS_OK ) {
1738- pthread_mutex_unlock (& gaudi_init_mutex );
1739- ucs_error ("Failed to initialize spinlock: %s" ,
1740- ucs_status_string (status ));
1741- return ;
1742- }
1743-
17441751 ucs_debug ("Registering Gaudi topology provider" );
17451752 ucs_list_add_head (& ucs_sys_topo_providers_list ,
17461753 & ucs_gaudi_topo_provider .list );
@@ -1763,6 +1770,14 @@ static ucs_status_t ucs_gaudi_lazy_init()
17631770 return UCS_ERR_UNSUPPORTED ;
17641771 }
17651772
1773+ /* Ensure spinlock exists */
1774+ pthread_once (& gaudi_spinlock_once_flag , ucs_gaudi_spinlock_once_init );
1775+ if (gaudi_spinlock_init_status != UCS_OK ) {
1776+ ucs_error ("Failed to initialize spinlock: %s" ,
1777+ ucs_status_string (gaudi_spinlock_init_status ));
1778+ return gaudi_spinlock_init_status ;
1779+ }
1780+
17661781 ucs_spin_lock (& ucs_gaudi_topo_ctx .lock );
17671782
17681783 if (ucs_gaudi_topo_ctx .initialized ) {
@@ -1891,9 +1906,6 @@ void ucs_gaudi_topo_cleanup()
18911906 ucs_gaudi_topo_ctx .have_assignment = 0 ;
18921907
18931908 ucs_spin_unlock (& ucs_gaudi_topo_ctx .lock );
1894- ucs_spinlock_destroy (& ucs_gaudi_topo_ctx .lock );
1895-
18961909 pthread_mutex_unlock (& gaudi_init_mutex );
1897-
18981910 ucs_debug ("Gaudi topology cleaned up" );
18991911}
0 commit comments