Skip to content

Commit 3e75e41

Browse files
georges-armFrank Barchard
authored and
Frank Barchard
committed
[AArch64] Add "limit" variable explanations in SVE *AR30 kernels
As requested here: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6023583/1/source/row_sve.cc#1973 Change-Id: I15d8ca1f724a7123fbf52ac60b18c850e4004e64 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6067153 Reviewed-by: Justin Green <[email protected]> Reviewed-by: Frank Barchard <[email protected]>
1 parent 11ef227 commit 3e75e41

File tree

1 file changed

+9
-1
lines changed

1 file changed

+9
-1
lines changed

source/row_sve.cc

+9-1
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,10 @@ extern "C" {
301301
"z20", "z22", "z23", "z24", "z25", "z26", "z27", "z28", "z29", "z30", \
302302
"z31", "p0", "p1", "p2", "p3"
303303

304-
// Store AR30 elements
304+
// Store AR30 elements. Inputs are 2.14 fixed point RGB. We expect z23 to be
305+
// populated with 0x3ff0 (0x3fff would also work) to saturate the R input
306+
// rather than needing a pair of shifts to saturate and then insert into the
307+
// correct position in the lane.
305308
#define STOREAR30_SVE \
306309
"uqshl z16.h, p0/m, z16.h, #2 \n" /* bbbbbbbbbbxxxxxx */ \
307310
"uqshl z17.h, p0/m, z17.h, #2 \n" /* ggggggggggxxxxxx */ \
@@ -2196,6 +2199,7 @@ void I210ToAR30Row_SVE2(const uint16_t* src_y,
21962199
uint64_t vl;
21972200
asm("cnth %0" : "=r"(vl));
21982201
int width_last_y = width & (vl - 1);
2202+
// The limit is used for saturating the 2.14 red channel in STOREAR30_SVE.
21992203
uint16_t limit = 0x3ff0;
22002204
asm volatile(
22012205
"ptrue p0.b \n" //
@@ -2301,6 +2305,7 @@ void P210ToAR30Row_SVE2(const uint16_t* src_y,
23012305
int width_last_uv = width_last_y + (width_last_y & 1);
23022306
uint32_t nv_uv_start = 0x03010301U;
23032307
uint32_t nv_uv_step = 0x04040404U;
2308+
// The limit is used for saturating the 2.14 red channel in STOREAR30_SVE.
23042309
uint16_t limit = 0x3ff0;
23052310
asm volatile(
23062311
"ptrue p0.b \n" //
@@ -2458,6 +2463,7 @@ void I410ToAR30Row_SVE2(const uint16_t* src_y,
24582463
uint64_t vl;
24592464
asm("cnth %0" : "=r"(vl));
24602465
int width_last_y = width & (vl - 1);
2466+
// The limit is used for saturating the 2.14 red channel in STOREAR30_SVE.
24612467
uint16_t limit = 0x3ff0;
24622468
asm volatile(
24632469
"ptrue p0.b \n" //
@@ -2555,6 +2561,7 @@ void P410ToAR30Row_SVE2(const uint16_t* src_y,
25552561
uint64_t vl;
25562562
asm("cnth %0" : "=r"(vl));
25572563
int width_last_y = width & (vl - 1);
2564+
// The limit is used for saturating the 2.14 red channel in STOREAR30_SVE.
25582565
uint16_t limit = 0x3ff0;
25592566
asm volatile(
25602567
"ptrue p0.b \n" //
@@ -2607,6 +2614,7 @@ void I212ToAR30Row_SVE2(const uint16_t* src_y,
26072614
uint64_t vl;
26082615
asm("cnth %0" : "=r"(vl));
26092616
int width_last_y = width & (vl - 1);
2617+
// The limit is used for saturating the 2.14 red channel in STOREAR30_SVE.
26102618
uint16_t limit = 0x3ff0;
26112619
asm volatile(
26122620
"ptrue p0.b \n" //

0 commit comments

Comments
 (0)