@@ -301,7 +301,10 @@ extern "C" {
301
301
" z20" , " z22" , " z23" , " z24" , " z25" , " z26" , " z27" , " z28" , " z29" , " z30" , \
302
302
" z31" , " p0" , " p1" , " p2" , " p3"
303
303
304
- // Store AR30 elements
304
+ // Store AR30 elements. Inputs are 2.14 fixed point RGB. We expect z23 to be
305
+ // populated with 0x3ff0 (0x3fff would also work) to saturate the R input
306
+ // rather than needing a pair of shifts to saturate and then insert into the
307
+ // correct position in the lane.
305
308
#define STOREAR30_SVE \
306
309
" uqshl z16.h, p0/m, z16.h, #2 \n " /* bbbbbbbbbbxxxxxx */ \
307
310
" uqshl z17.h, p0/m, z17.h, #2 \n " /* ggggggggggxxxxxx */ \
@@ -2196,6 +2199,7 @@ void I210ToAR30Row_SVE2(const uint16_t* src_y,
2196
2199
uint64_t vl;
2197
2200
asm (" cnth %0" : " =r" (vl));
2198
2201
int width_last_y = width & (vl - 1 );
2202
+ // The limit is used for saturating the 2.14 red channel in STOREAR30_SVE.
2199
2203
uint16_t limit = 0x3ff0 ;
2200
2204
asm volatile (
2201
2205
" ptrue p0.b \n " //
@@ -2301,6 +2305,7 @@ void P210ToAR30Row_SVE2(const uint16_t* src_y,
2301
2305
int width_last_uv = width_last_y + (width_last_y & 1 );
2302
2306
uint32_t nv_uv_start = 0x03010301U ;
2303
2307
uint32_t nv_uv_step = 0x04040404U ;
2308
+ // The limit is used for saturating the 2.14 red channel in STOREAR30_SVE.
2304
2309
uint16_t limit = 0x3ff0 ;
2305
2310
asm volatile (
2306
2311
" ptrue p0.b \n " //
@@ -2458,6 +2463,7 @@ void I410ToAR30Row_SVE2(const uint16_t* src_y,
2458
2463
uint64_t vl;
2459
2464
asm (" cnth %0" : " =r" (vl));
2460
2465
int width_last_y = width & (vl - 1 );
2466
+ // The limit is used for saturating the 2.14 red channel in STOREAR30_SVE.
2461
2467
uint16_t limit = 0x3ff0 ;
2462
2468
asm volatile (
2463
2469
" ptrue p0.b \n " //
@@ -2555,6 +2561,7 @@ void P410ToAR30Row_SVE2(const uint16_t* src_y,
2555
2561
uint64_t vl;
2556
2562
asm (" cnth %0" : " =r" (vl));
2557
2563
int width_last_y = width & (vl - 1 );
2564
+ // The limit is used for saturating the 2.14 red channel in STOREAR30_SVE.
2558
2565
uint16_t limit = 0x3ff0 ;
2559
2566
asm volatile (
2560
2567
" ptrue p0.b \n " //
@@ -2607,6 +2614,7 @@ void I212ToAR30Row_SVE2(const uint16_t* src_y,
2607
2614
uint64_t vl;
2608
2615
asm (" cnth %0" : " =r" (vl));
2609
2616
int width_last_y = width & (vl - 1 );
2617
+ // The limit is used for saturating the 2.14 red channel in STOREAR30_SVE.
2610
2618
uint16_t limit = 0x3ff0 ;
2611
2619
asm volatile (
2612
2620
" ptrue p0.b \n " //
0 commit comments