119
119
#include < immintrin.h>
120
120
#endif
121
121
122
+ #define RUAPU_IMPLEMENTATION
123
+ #include " ruapu.h"
124
+
122
125
// topology info
123
126
static int g_cpucount;
124
127
static int g_physical_cpucount;
@@ -185,7 +188,6 @@ static int g_cpu_is_arm_a53_a55;
185
188
#endif // __aarch64__
186
189
#endif // defined __ANDROID__ || defined __linux__
187
190
188
- static bool g_is_being_debugged = false ;
189
191
static bool is_being_debugged ()
190
192
{
191
193
#if defined _WIN32
@@ -240,186 +242,6 @@ static bool is_being_debugged()
240
242
#endif
241
243
}
242
244
243
- #if defined _WIN32
244
- #if WINAPI_FAMILY == WINAPI_FAMILY_APP
245
- static int detectisa (const void * /* some_inst*/ )
246
- {
247
- // uwp does not support seh :(
248
- return 0 ;
249
- }
250
- #else // WINAPI_FAMILY == WINAPI_FAMILY_APP
251
- static int g_sigill_caught = 0 ;
252
- static jmp_buf g_jmpbuf;
253
-
254
- static LONG CALLBACK catch_sigill (struct _EXCEPTION_POINTERS * ExceptionInfo)
255
- {
256
- if (ExceptionInfo->ExceptionRecord ->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION)
257
- {
258
- g_sigill_caught = 1 ;
259
- longjmp (g_jmpbuf, -1 );
260
- }
261
-
262
- return EXCEPTION_CONTINUE_SEARCH;
263
- }
264
-
265
- static int detectisa (const void * some_inst)
266
- {
267
- if (g_is_being_debugged)
268
- return 0 ;
269
-
270
- g_sigill_caught = 0 ;
271
-
272
- PVOID eh = AddVectoredExceptionHandler (1 , catch_sigill);
273
-
274
- if (setjmp (g_jmpbuf) == 0 )
275
- {
276
- ((void (*)())some_inst)();
277
- }
278
-
279
- RemoveVectoredExceptionHandler (eh);
280
-
281
- return g_sigill_caught ? 0 : 1 ;
282
- }
283
- #endif // WINAPI_FAMILY == WINAPI_FAMILY_APP
284
-
285
- #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
286
- #ifdef _MSC_VER
287
- #define DEFINE_INSTCODE (name, ...) __pragma(section(" .text" )) __declspec(allocate(" .text" )) static unsigned char name[] = {__VA_ARGS__, 0xc3 };
288
- #else
289
- #define DEFINE_INSTCODE (name, ...) __attribute__((section(" .text" ))) static unsigned char name[] = {__VA_ARGS__, 0xc3 };
290
- #endif
291
- #elif __aarch64__
292
- #ifdef _MSC_VER
293
- #define DEFINE_INSTCODE (name, ...) __pragma(section(" .text" )) __declspec(allocate(" .text" )) static unsigned int name[] = {__VA_ARGS__, 0xd65f03c0 };
294
- #else
295
- #define DEFINE_INSTCODE (name, ...) __attribute__((section(" .text" ))) static unsigned int name[] = {__VA_ARGS__, 0xd65f03c0 };
296
- #endif
297
- #elif __arm__
298
- #ifdef _MSC_VER
299
- #define DEFINE_INSTCODE (name, ...) __pragma(section(" .text" )) __declspec(allocate(" .text" )) static unsigned int name[] = {__VA_ARGS__, 0x4770bf00 };
300
- #else
301
- #define DEFINE_INSTCODE (name, ...) __attribute__((section(" .text" ))) static unsigned int name[] = {__VA_ARGS__, 0x4770bf00 };
302
- #endif
303
- #endif
304
-
305
- #elif defined __ANDROID__ || defined __linux__ || defined __APPLE__
306
- static int g_sigill_caught = 0 ;
307
- static sigjmp_buf g_jmpbuf;
308
-
309
- static void catch_sigill (int /* signo*/ , siginfo_t * /* si*/ , void * /* data*/ )
310
- {
311
- g_sigill_caught = 1 ;
312
- siglongjmp (g_jmpbuf, -1 );
313
- }
314
-
315
- static int detectisa (void (*some_inst)())
316
- {
317
- if (g_is_being_debugged)
318
- return 0 ;
319
-
320
- g_sigill_caught = 0 ;
321
-
322
- struct sigaction sa;
323
- struct sigaction old_sa;
324
- memset (&sa, 0 , sizeof (sa));
325
- sa.sa_sigaction = catch_sigill;
326
- sa.sa_flags = SA_ONSTACK | SA_RESTART | SA_SIGINFO;
327
- sigaction (SIGILL, &sa, &old_sa);
328
-
329
- if (sigsetjmp (g_jmpbuf, 1 ) == 0 )
330
- {
331
- some_inst ();
332
- }
333
-
334
- sigaction (SIGILL, &old_sa, NULL );
335
-
336
- return g_sigill_caught ? 0 : 1 ;
337
- }
338
-
339
- #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
340
- #define DEFINE_INSTCODE (name, ...) \
341
- static void name () \
342
- { \
343
- asm volatile (" .byte " #__VA_ARGS__ \
344
- : \
345
- : \
346
- :); \
347
- };
348
- #elif __aarch64__
349
- #define DEFINE_INSTCODE (name, ...) \
350
- static void name () \
351
- { \
352
- asm volatile (" .word " #__VA_ARGS__ \
353
- : \
354
- : \
355
- :); \
356
- };
357
- #elif __arm__
358
- #define DEFINE_INSTCODE (name, ...) \
359
- static void name () \
360
- { \
361
- asm volatile (" .word " #__VA_ARGS__ \
362
- : \
363
- : \
364
- :); \
365
- };
366
- #endif
367
-
368
- #endif // defined _WIN32 || defined __ANDROID__ || defined __linux__ || defined __APPLE__
369
-
370
- #if defined _WIN32 || defined __ANDROID__ || defined __linux__ || defined __APPLE__
371
- #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
372
- DEFINE_INSTCODE (some_mmx, 0x0f , 0xdb , 0xc0 ) // pand mm0,mm0
373
- DEFINE_INSTCODE(some_sse, 0x0f , 0x54 , 0xc0 ) // andps xmm0,xmm0
374
- DEFINE_INSTCODE(some_sse2, 0x66 , 0x0f , 0xfe , 0xc0 ) // paddd xmm0,xmm0
375
- DEFINE_INSTCODE(some_sse3, 0xf2 , 0x0f , 0x7c , 0xc0 ) // haddps xmm0,xmm0
376
- DEFINE_INSTCODE(some_ssse3, 0x66 , 0x0f , 0x38 , 0x06 , 0xc0 ) // phsubd xmm0,xmm0
377
- DEFINE_INSTCODE(some_sse41, 0x66 , 0x0f , 0x38 , 0x3d , 0xc0 ) // pmaxsd xmm0,xmm0
378
- DEFINE_INSTCODE(some_sse42, 0x66 , 0x0f , 0x38 , 0x37 , 0xc0 ) // pcmpgtq xmm0,xmm0
379
- DEFINE_INSTCODE(some_sse4a, 0x66 , 0x0f , 0x79 , 0xc0 ) // extrq xmm0,xmm0
380
- DEFINE_INSTCODE(some_xop, 0x8f , 0xe8 , 0x78 , 0xb6 , 0xc0 , 0x00 ) // vpmadcswd %xmm0,%xmm0,%xmm0,%xmm0
381
- DEFINE_INSTCODE(some_avx, 0xc5 , 0xfc , 0x54 , 0xc0 ) // vandps ymm0,ymm0,ymm0
382
- DEFINE_INSTCODE(some_f16c, 0xc4 , 0xe2 , 0x7d , 0x13 , 0xc0 ) // vcvtph2ps ymm0,xmm0
383
- DEFINE_INSTCODE(some_fma, 0xc4 , 0xe2 , 0x7d , 0x98 , 0xc0 ) // vfmadd132ps ymm0,ymm0,ymm0
384
- DEFINE_INSTCODE(some_avx2, 0xc5 , 0xfd , 0xfe , 0xc0 ) // vpaddd ymm0,ymm0,ymm0
385
- DEFINE_INSTCODE(some_avx512f, 0x62 , 0xf1 , 0x7c , 0x48 , 0x58 , 0xc0 ) // vaddps zmm0,zmm0,zmm0
386
- DEFINE_INSTCODE(some_avx512bw, 0x62 , 0xf1 , 0x7d , 0x48 , 0xfd , 0xc0 ) // vpaddw zmm0,zmm0,zmm0
387
- DEFINE_INSTCODE(some_avx512cd, 0x62 , 0xf2 , 0xfd , 0x48 , 0x44 , 0xc0 ) // vplzcntq zmm0,zmm0
388
- DEFINE_INSTCODE(some_avx512dq, 0x62 , 0xf1 , 0x7c , 0x48 , 0x54 , 0xc0 ) // vandps zmm0,zmm0,zmm0
389
- DEFINE_INSTCODE(some_avx512vl, 0x62 , 0xf2 , 0xfd , 0x28 , 0x1f , 0xc0 ) // vpabsq ymm0,ymm0
390
- DEFINE_INSTCODE(some_avx512vnni, 0x62 , 0xf2 , 0x7d , 0x48 , 0x52 , 0xc0 ) // vpdpwssd %zmm0,%zmm0,%zmm0
391
- DEFINE_INSTCODE(some_avx512bf16, 0x62 , 0xf2 , 0x7e , 0x48 , 0x52 , 0xc0 ) // vdpbf16ps %zmm0,%zmm0,%zmm0
392
- DEFINE_INSTCODE(some_avx512ifma, 0x62 , 0xf2 , 0xfd , 0x48 , 0xb4 , 0xc0 ) // vpmadd52luq %zmm0,%zmm0,%zmm0
393
- DEFINE_INSTCODE(some_avx512vbmi, 0x62 , 0xf2 , 0x7d , 0x48 , 0x75 , 0xc0 ) // vpermi2b %zmm0,%zmm0,%zmm0
394
- DEFINE_INSTCODE(some_avx512vbmi2, 0x62 , 0xf2 , 0x7d , 0x48 , 0x71 , 0xc0 ) // vpshldvd %zmm0,%zmm0,%zmm0
395
- DEFINE_INSTCODE(some_avx512fp16, 0x62 , 0xf6 , 0x7d , 0x48 , 0x98 , 0xc0 ) // vfmadd132ph %zmm0,%zmm0,%zmm0
396
- DEFINE_INSTCODE(some_avxvnni, 0x62 , 0xf2 , 0x7d , 0x28 , 0x52 , 0xc0 ) // vpdpwssd ymm0,ymm0,ymm0
397
- DEFINE_INSTCODE(some_avxvnniint8, 0xc4 , 0xe2 , 0x7f , 0x50 , 0xc0 ) // vpdpbssd ymm0,ymm0,ymm0
398
- DEFINE_INSTCODE(some_avxifma, 0x62 , 0xf2 , 0xfd , 0x28 , 0xb4 , 0xc0 ) // vpmadd52luq %ymm0,%ymm0,%ymm0
399
-
400
- #elif __aarch64__
401
- DEFINE_INSTCODE (some_neon, 0x4e20d400 ) // fadd v0.4s,v0.4s,v0.4s
402
- DEFINE_INSTCODE(some_vfpv4, 0x0e216800 ) // fcvtn v0.4h,v0.4s
403
- DEFINE_INSTCODE(some_cpuid, 0xd5380000 ) // mrs x0,midr_el1
404
- DEFINE_INSTCODE(some_asimdhp, 0x0e401400 ) // fadd v0.4h,v0.4h,v0.4h
405
- DEFINE_INSTCODE(some_asimddp, 0x4e809400 ) // sdot v0.4h,v0.16b,v0.16b
406
- DEFINE_INSTCODE(some_asimdfhm, 0x4e20ec00 ) // fmlal v0.4s,v0.4h,v0.4h
407
- DEFINE_INSTCODE(some_bf16, 0x6e40ec00 ) // bfmmla v0.4h,v0.8h,v0.8h
408
- DEFINE_INSTCODE(some_i8mm, 0x4e80a400 ) // smmla v0.4h,v0.16b,v0.16b
409
- DEFINE_INSTCODE(some_sve, 0x65608000 ) // fmad z0.h,p0/m,z0.h,z0.h
410
- DEFINE_INSTCODE(some_sve2, 0x44405000 ) // smlslb z0.h,z0.b,z0.b
411
- DEFINE_INSTCODE(some_svebf16, 0x6460e400 ) // bfmmla z0.s,z0.h,z0.h
412
- DEFINE_INSTCODE(some_svei8mm, 0x45009800 ) // smmla z0.s,z0.b,z0.b
413
- DEFINE_INSTCODE(some_svef32mm, 0x64a0e400 ) // fmmla z0.s,z0.s,z0.s
414
-
415
- #elif __arm__
416
- DEFINE_INSTCODE (some_edsp, 0x0000fb20 ) // smlad r0,r0,r0,r0
417
- DEFINE_INSTCODE(some_neon, 0x0d40ef00 ) // vadd.f32 q0,q0,q0
418
- DEFINE_INSTCODE(some_vfpv4, 0x0600ffb6 ) // vcvt.f16.f32 d0,q0
419
-
420
- #endif
421
- #endif // defined _WIN32 || defined __ANDROID__ || defined __linux__ || defined __APPLE__
422
-
423
245
#if defined __ANDROID__ || defined __linux__
424
246
425
247
#define AT_HWCAP 16
@@ -765,7 +587,7 @@ static int get_cpu_support_x86_avx2()
765
587
static int get_cpu_support_x86_avx_vnni ()
766
588
{
767
589
#if __APPLE__
768
- return detectisa (some_avxvnni );
590
+ return ruapu_supports ( " avxvnni " );
769
591
#else
770
592
unsigned int cpu_info[4 ] = {0 };
771
593
x86_cpuid (0 , cpu_info);
@@ -791,7 +613,7 @@ static int get_cpu_support_x86_avx_vnni()
791
613
static int get_cpu_support_x86_avx512 ()
792
614
{
793
615
#if __APPLE__
794
- return detectisa (some_avx512f ) && detectisa (some_avx512bw ) && detectisa (some_avx512cd ) && detectisa (some_avx512dq ) && detectisa (some_avx512vl );
616
+ return ruapu_supports ( " avx512f " ) && ruapu_supports ( " avx512bw " ) && ruapu_supports ( " avx512cd " ) && ruapu_supports ( " avx512dq " ) && ruapu_supports ( " avx512vl " );
795
617
#else
796
618
unsigned int cpu_info[4 ] = {0 };
797
619
x86_cpuid (0 , cpu_info);
@@ -821,7 +643,7 @@ static int get_cpu_support_x86_avx512()
821
643
static int get_cpu_support_x86_avx512_vnni ()
822
644
{
823
645
#if __APPLE__
824
- return detectisa (some_avx512vnni );
646
+ return ruapu_supports ( " avx512vnni " );
825
647
#else
826
648
unsigned int cpu_info[4 ] = {0 };
827
649
x86_cpuid (0 , cpu_info);
@@ -851,7 +673,7 @@ static int get_cpu_support_x86_avx512_vnni()
851
673
static int get_cpu_support_x86_avx512_bf16 ()
852
674
{
853
675
#if __APPLE__
854
- return detectisa (some_avx512bf16 );
676
+ return ruapu_supports ( " avx512bf16 " );
855
677
#else
856
678
unsigned int cpu_info[4 ] = {0 };
857
679
x86_cpuid (0 , cpu_info);
@@ -877,7 +699,7 @@ static int get_cpu_support_x86_avx512_bf16()
877
699
static int get_cpu_support_x86_avx512_fp16 ()
878
700
{
879
701
#if __APPLE__
880
- return detectisa (some_avx512fp16 );
702
+ return ruapu_supports ( " avx512fp16 " );
881
703
#else
882
704
unsigned int cpu_info[4 ] = {0 };
883
705
x86_cpuid (0 , cpu_info);
@@ -2035,25 +1857,30 @@ static void initialize_global_cpu_info()
2035
1857
g_powersave = 0 ;
2036
1858
initialize_cpu_thread_affinity_mask (g_cpu_affinity_mask_all, g_cpu_affinity_mask_little, g_cpu_affinity_mask_big);
2037
1859
2038
- g_is_being_debugged = is_being_debugged ();
1860
+ #if (defined _WIN32 && (__aarch64__ || __arm__)) || __APPLE__
1861
+ if (!is_being_debugged ())
1862
+ {
1863
+ ruapu_init ();
1864
+ }
1865
+ #endif
2039
1866
2040
1867
#if defined _WIN32
2041
1868
#if __aarch64__
2042
- g_cpu_support_arm_cpuid = detectisa (some_cpuid );
2043
- g_cpu_support_arm_asimdhp = detectisa (some_asimdhp ) || IsProcessorFeaturePresent (43 ); // dp implies hp
2044
- g_cpu_support_arm_asimddp = detectisa (some_asimddp ) || IsProcessorFeaturePresent (43 ); // 43 is PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
2045
- g_cpu_support_arm_asimdfhm = detectisa (some_asimdfhm );
2046
- g_cpu_support_arm_bf16 = detectisa (some_bf16 );
2047
- g_cpu_support_arm_i8mm = detectisa (some_i8mm );
2048
- g_cpu_support_arm_sve = detectisa (some_sve );
2049
- g_cpu_support_arm_sve2 = detectisa (some_sve2 );
2050
- g_cpu_support_arm_svebf16 = detectisa (some_svebf16 );
2051
- g_cpu_support_arm_svei8mm = detectisa (some_svei8mm );
2052
- g_cpu_support_arm_svef32mm = detectisa (some_svef32mm );
1869
+ g_cpu_support_arm_cpuid = ruapu_supports ( " cpuid " );
1870
+ g_cpu_support_arm_asimdhp = ruapu_supports ( " asimdhp " ) || IsProcessorFeaturePresent (43 ); // dp implies hp
1871
+ g_cpu_support_arm_asimddp = ruapu_supports ( " asimddp " ) || IsProcessorFeaturePresent (43 ); // 43 is PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
1872
+ g_cpu_support_arm_asimdfhm = ruapu_supports ( " asimdfhm " );
1873
+ g_cpu_support_arm_bf16 = ruapu_supports ( " bf16 " );
1874
+ g_cpu_support_arm_i8mm = ruapu_supports ( " i8mm " );
1875
+ g_cpu_support_arm_sve = ruapu_supports ( " sve " );
1876
+ g_cpu_support_arm_sve2 = ruapu_supports ( " sve2 " );
1877
+ g_cpu_support_arm_svebf16 = ruapu_supports ( " svebf16 " );
1878
+ g_cpu_support_arm_svei8mm = ruapu_supports ( " svei8mm " );
1879
+ g_cpu_support_arm_svef32mm = ruapu_supports ( " svef32mm " );
2053
1880
#elif __arm__
2054
- g_cpu_support_arm_edsp = detectisa (some_edsp );
1881
+ g_cpu_support_arm_edsp = ruapu_supports ( " edsp " );
2055
1882
g_cpu_support_arm_neon = 1 ; // all modern windows arm devices have neon
2056
- g_cpu_support_arm_vfpv4 = detectisa (some_vfpv4 );
1883
+ g_cpu_support_arm_vfpv4 = ruapu_supports ( " vfpv4 " );
2057
1884
#endif // __aarch64__ || __arm__
2058
1885
#elif defined __ANDROID__ || defined __linux__
2059
1886
g_hwcaps = get_elf_hwcap (AT_HWCAP);
0 commit comments