Skip to content

Commit 226c286

Browse files
jan-wassenbergcopybara-github
authored andcommitted
split minmax_test due to OOM
PiperOrigin-RevId: 736767709
1 parent 5c80e6f commit 226c286

File tree

4 files changed

+195
-149
lines changed

4 files changed

+195
-149
lines changed

BUILD

+1
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,7 @@ HWY_TESTS = [
554554
("hwy/tests/", "masked_minmax_test"),
555555
("hwy/tests/", "memory_test"),
556556
("hwy/tests/", "minmax_test"),
557+
("hwy/tests/", "minmax128_test"),
557558
("hwy/tests/", "mul_by_pow2_test"),
558559
("hwy/tests/", "mul_pairwise_test"),
559560
("hwy/tests/", "mul_test"),

CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,7 @@ set(HWY_TEST_FILES
812812
hwy/tests/masked_minmax_test.cc
813813
hwy/tests/memory_test.cc
814814
hwy/tests/minmax_test.cc
815+
hwy/tests/minmax128_test.cc
815816
hwy/tests/mul_by_pow2_test.cc
816817
hwy/tests/mul_pairwise_test.cc
817818
hwy/tests/mul_test.cc

hwy/tests/minmax128_test.cc

+193
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
// Copyright 2019 Google LLC
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the "License");
5+
// you may not use this file except in compliance with the License.
6+
// You may obtain a copy of the License at
7+
//
8+
// http://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
16+
#include <stddef.h>
17+
#include <stdint.h>
18+
19+
#undef HWY_TARGET_INCLUDE
20+
#define HWY_TARGET_INCLUDE "tests/minmax128_test.cc"
21+
#include "hwy/foreach_target.h" // IWYU pragma: keep
22+
#include "hwy/highway.h"
23+
#include "hwy/tests/test_util-inl.h"
24+
25+
HWY_BEFORE_NAMESPACE();
26+
namespace hwy {
27+
namespace HWY_NAMESPACE {
28+
namespace {
29+
30+
template <class D>
31+
static HWY_NOINLINE Vec<D> Make128(D d, uint64_t hi, uint64_t lo) {
32+
alignas(16) uint64_t in[2];
33+
in[0] = lo;
34+
in[1] = hi;
35+
return LoadDup128(d, in);
36+
}
37+
38+
struct TestMinMax128 {
39+
template <typename T, class D>
40+
HWY_NOINLINE void operator()(T /*unused*/, D d) {
41+
using V = Vec<D>;
42+
const size_t N = Lanes(d);
43+
auto a_lanes = AllocateAligned<T>(N);
44+
auto b_lanes = AllocateAligned<T>(N);
45+
auto min_lanes = AllocateAligned<T>(N);
46+
auto max_lanes = AllocateAligned<T>(N);
47+
HWY_ASSERT(a_lanes && b_lanes && min_lanes && max_lanes);
48+
RandomState rng;
49+
50+
const V v00 = Zero(d);
51+
const V v01 = Make128(d, 0, 1);
52+
const V v10 = Make128(d, 1, 0);
53+
const V v11 = Add(v01, v10);
54+
55+
// Same arg
56+
HWY_ASSERT_VEC_EQ(d, v00, Min128(d, v00, v00));
57+
HWY_ASSERT_VEC_EQ(d, v01, Min128(d, v01, v01));
58+
HWY_ASSERT_VEC_EQ(d, v10, Min128(d, v10, v10));
59+
HWY_ASSERT_VEC_EQ(d, v11, Min128(d, v11, v11));
60+
HWY_ASSERT_VEC_EQ(d, v00, Max128(d, v00, v00));
61+
HWY_ASSERT_VEC_EQ(d, v01, Max128(d, v01, v01));
62+
HWY_ASSERT_VEC_EQ(d, v10, Max128(d, v10, v10));
63+
HWY_ASSERT_VEC_EQ(d, v11, Max128(d, v11, v11));
64+
65+
// First arg less
66+
HWY_ASSERT_VEC_EQ(d, v00, Min128(d, v00, v01));
67+
HWY_ASSERT_VEC_EQ(d, v01, Min128(d, v01, v10));
68+
HWY_ASSERT_VEC_EQ(d, v10, Min128(d, v10, v11));
69+
HWY_ASSERT_VEC_EQ(d, v01, Max128(d, v00, v01));
70+
HWY_ASSERT_VEC_EQ(d, v10, Max128(d, v01, v10));
71+
HWY_ASSERT_VEC_EQ(d, v11, Max128(d, v10, v11));
72+
73+
// Second arg less
74+
HWY_ASSERT_VEC_EQ(d, v00, Min128(d, v01, v00));
75+
HWY_ASSERT_VEC_EQ(d, v01, Min128(d, v10, v01));
76+
HWY_ASSERT_VEC_EQ(d, v10, Min128(d, v11, v10));
77+
HWY_ASSERT_VEC_EQ(d, v01, Max128(d, v01, v00));
78+
HWY_ASSERT_VEC_EQ(d, v10, Max128(d, v10, v01));
79+
HWY_ASSERT_VEC_EQ(d, v11, Max128(d, v11, v10));
80+
81+
// Also check 128-bit blocks are independent
82+
for (size_t rep = 0; rep < AdjustedReps(1000); ++rep) {
83+
for (size_t i = 0; i < N; ++i) {
84+
a_lanes[i] = Random64(&rng);
85+
b_lanes[i] = Random64(&rng);
86+
}
87+
const V a = Load(d, a_lanes.get());
88+
const V b = Load(d, b_lanes.get());
89+
for (size_t i = 0; i < N; i += 2) {
90+
const bool lt = a_lanes[i + 1] == b_lanes[i + 1]
91+
? (a_lanes[i] < b_lanes[i])
92+
: (a_lanes[i + 1] < b_lanes[i + 1]);
93+
min_lanes[i + 0] = lt ? a_lanes[i + 0] : b_lanes[i + 0];
94+
min_lanes[i + 1] = lt ? a_lanes[i + 1] : b_lanes[i + 1];
95+
max_lanes[i + 0] = lt ? b_lanes[i + 0] : a_lanes[i + 0];
96+
max_lanes[i + 1] = lt ? b_lanes[i + 1] : a_lanes[i + 1];
97+
}
98+
HWY_ASSERT_VEC_EQ(d, min_lanes.get(), Min128(d, a, b));
99+
HWY_ASSERT_VEC_EQ(d, max_lanes.get(), Max128(d, a, b));
100+
}
101+
}
102+
};
103+
104+
HWY_NOINLINE void TestAllMinMax128() {
105+
ForGEVectors<128, TestMinMax128>()(uint64_t());
106+
}
107+
108+
struct TestMinMax128Upper {
109+
template <typename T, class D>
110+
HWY_NOINLINE void operator()(T /*unused*/, D d) {
111+
using V = Vec<D>;
112+
const size_t N = Lanes(d);
113+
auto a_lanes = AllocateAligned<T>(N);
114+
auto b_lanes = AllocateAligned<T>(N);
115+
auto min_lanes = AllocateAligned<T>(N);
116+
auto max_lanes = AllocateAligned<T>(N);
117+
RandomState rng;
118+
119+
const V v00 = Zero(d);
120+
const V v01 = Make128(d, 0, 1);
121+
const V v10 = Make128(d, 1, 0);
122+
const V v11 = Add(v01, v10);
123+
124+
// Same arg
125+
HWY_ASSERT_VEC_EQ(d, v00, Min128Upper(d, v00, v00));
126+
HWY_ASSERT_VEC_EQ(d, v01, Min128Upper(d, v01, v01));
127+
HWY_ASSERT_VEC_EQ(d, v10, Min128Upper(d, v10, v10));
128+
HWY_ASSERT_VEC_EQ(d, v11, Min128Upper(d, v11, v11));
129+
HWY_ASSERT_VEC_EQ(d, v00, Max128Upper(d, v00, v00));
130+
HWY_ASSERT_VEC_EQ(d, v01, Max128Upper(d, v01, v01));
131+
HWY_ASSERT_VEC_EQ(d, v10, Max128Upper(d, v10, v10));
132+
HWY_ASSERT_VEC_EQ(d, v11, Max128Upper(d, v11, v11));
133+
134+
// Equivalent but not equal (chooses second arg)
135+
HWY_ASSERT_VEC_EQ(d, v01, Min128Upper(d, v00, v01));
136+
HWY_ASSERT_VEC_EQ(d, v11, Min128Upper(d, v10, v11));
137+
HWY_ASSERT_VEC_EQ(d, v00, Min128Upper(d, v01, v00));
138+
HWY_ASSERT_VEC_EQ(d, v10, Min128Upper(d, v11, v10));
139+
HWY_ASSERT_VEC_EQ(d, v00, Max128Upper(d, v01, v00));
140+
HWY_ASSERT_VEC_EQ(d, v10, Max128Upper(d, v11, v10));
141+
HWY_ASSERT_VEC_EQ(d, v01, Max128Upper(d, v00, v01));
142+
HWY_ASSERT_VEC_EQ(d, v11, Max128Upper(d, v10, v11));
143+
144+
// First arg less
145+
HWY_ASSERT_VEC_EQ(d, v01, Min128Upper(d, v01, v10));
146+
HWY_ASSERT_VEC_EQ(d, v10, Max128Upper(d, v01, v10));
147+
148+
// Second arg less
149+
HWY_ASSERT_VEC_EQ(d, v01, Min128Upper(d, v10, v01));
150+
HWY_ASSERT_VEC_EQ(d, v10, Max128Upper(d, v10, v01));
151+
152+
// Also check 128-bit blocks are independent
153+
for (size_t rep = 0; rep < AdjustedReps(1000); ++rep) {
154+
for (size_t i = 0; i < N; ++i) {
155+
a_lanes[i] = Random64(&rng);
156+
b_lanes[i] = Random64(&rng);
157+
}
158+
const V a = Load(d, a_lanes.get());
159+
const V b = Load(d, b_lanes.get());
160+
for (size_t i = 0; i < N; i += 2) {
161+
const bool lt = a_lanes[i + 1] < b_lanes[i + 1];
162+
min_lanes[i + 0] = lt ? a_lanes[i + 0] : b_lanes[i + 0];
163+
min_lanes[i + 1] = lt ? a_lanes[i + 1] : b_lanes[i + 1];
164+
max_lanes[i + 0] = lt ? b_lanes[i + 0] : a_lanes[i + 0];
165+
max_lanes[i + 1] = lt ? b_lanes[i + 1] : a_lanes[i + 1];
166+
}
167+
HWY_ASSERT_VEC_EQ(d, min_lanes.get(), Min128Upper(d, a, b));
168+
HWY_ASSERT_VEC_EQ(d, max_lanes.get(), Max128Upper(d, a, b));
169+
}
170+
}
171+
};
172+
173+
HWY_NOINLINE void TestAllMinMax128Upper() {
174+
ForGEVectors<128, TestMinMax128Upper>()(uint64_t());
175+
}
176+
177+
} // namespace
178+
// NOLINTNEXTLINE(google-readability-namespace-comments)
179+
} // namespace HWY_NAMESPACE
180+
} // namespace hwy
181+
HWY_AFTER_NAMESPACE();
182+
183+
#if HWY_ONCE
184+
namespace hwy {
185+
namespace {
186+
HWY_BEFORE_TEST(HwyMinMax128Test);
187+
HWY_EXPORT_AND_TEST_P(HwyMinMax128Test, TestAllMinMax128);
188+
HWY_EXPORT_AND_TEST_P(HwyMinMax128Test, TestAllMinMax128Upper);
189+
HWY_AFTER_TEST();
190+
} // namespace
191+
} // namespace hwy
192+
HWY_TEST_MAIN();
193+
#endif // HWY_ONCE

hwy/tests/minmax_test.cc

-149
Original file line numberDiff line numberDiff line change
@@ -114,153 +114,6 @@ HWY_NOINLINE void TestAllMinMax() {
114114
ForFloatTypes(ForPartialVectors<TestFloatMinMax>());
115115
}
116116

117-
template <class D>
118-
static HWY_NOINLINE Vec<D> Make128(D d, uint64_t hi, uint64_t lo) {
119-
alignas(16) uint64_t in[2];
120-
in[0] = lo;
121-
in[1] = hi;
122-
return LoadDup128(d, in);
123-
}
124-
125-
struct TestMinMax128 {
126-
template <typename T, class D>
127-
HWY_NOINLINE void operator()(T /*unused*/, D d) {
128-
using V = Vec<D>;
129-
const size_t N = Lanes(d);
130-
auto a_lanes = AllocateAligned<T>(N);
131-
auto b_lanes = AllocateAligned<T>(N);
132-
auto min_lanes = AllocateAligned<T>(N);
133-
auto max_lanes = AllocateAligned<T>(N);
134-
HWY_ASSERT(a_lanes && b_lanes && min_lanes && max_lanes);
135-
RandomState rng;
136-
137-
const V v00 = Zero(d);
138-
const V v01 = Make128(d, 0, 1);
139-
const V v10 = Make128(d, 1, 0);
140-
const V v11 = Add(v01, v10);
141-
142-
// Same arg
143-
HWY_ASSERT_VEC_EQ(d, v00, Min128(d, v00, v00));
144-
HWY_ASSERT_VEC_EQ(d, v01, Min128(d, v01, v01));
145-
HWY_ASSERT_VEC_EQ(d, v10, Min128(d, v10, v10));
146-
HWY_ASSERT_VEC_EQ(d, v11, Min128(d, v11, v11));
147-
HWY_ASSERT_VEC_EQ(d, v00, Max128(d, v00, v00));
148-
HWY_ASSERT_VEC_EQ(d, v01, Max128(d, v01, v01));
149-
HWY_ASSERT_VEC_EQ(d, v10, Max128(d, v10, v10));
150-
HWY_ASSERT_VEC_EQ(d, v11, Max128(d, v11, v11));
151-
152-
// First arg less
153-
HWY_ASSERT_VEC_EQ(d, v00, Min128(d, v00, v01));
154-
HWY_ASSERT_VEC_EQ(d, v01, Min128(d, v01, v10));
155-
HWY_ASSERT_VEC_EQ(d, v10, Min128(d, v10, v11));
156-
HWY_ASSERT_VEC_EQ(d, v01, Max128(d, v00, v01));
157-
HWY_ASSERT_VEC_EQ(d, v10, Max128(d, v01, v10));
158-
HWY_ASSERT_VEC_EQ(d, v11, Max128(d, v10, v11));
159-
160-
// Second arg less
161-
HWY_ASSERT_VEC_EQ(d, v00, Min128(d, v01, v00));
162-
HWY_ASSERT_VEC_EQ(d, v01, Min128(d, v10, v01));
163-
HWY_ASSERT_VEC_EQ(d, v10, Min128(d, v11, v10));
164-
HWY_ASSERT_VEC_EQ(d, v01, Max128(d, v01, v00));
165-
HWY_ASSERT_VEC_EQ(d, v10, Max128(d, v10, v01));
166-
HWY_ASSERT_VEC_EQ(d, v11, Max128(d, v11, v10));
167-
168-
// Also check 128-bit blocks are independent
169-
for (size_t rep = 0; rep < AdjustedReps(1000); ++rep) {
170-
for (size_t i = 0; i < N; ++i) {
171-
a_lanes[i] = Random64(&rng);
172-
b_lanes[i] = Random64(&rng);
173-
}
174-
const V a = Load(d, a_lanes.get());
175-
const V b = Load(d, b_lanes.get());
176-
for (size_t i = 0; i < N; i += 2) {
177-
const bool lt = a_lanes[i + 1] == b_lanes[i + 1]
178-
? (a_lanes[i] < b_lanes[i])
179-
: (a_lanes[i + 1] < b_lanes[i + 1]);
180-
min_lanes[i + 0] = lt ? a_lanes[i + 0] : b_lanes[i + 0];
181-
min_lanes[i + 1] = lt ? a_lanes[i + 1] : b_lanes[i + 1];
182-
max_lanes[i + 0] = lt ? b_lanes[i + 0] : a_lanes[i + 0];
183-
max_lanes[i + 1] = lt ? b_lanes[i + 1] : a_lanes[i + 1];
184-
}
185-
HWY_ASSERT_VEC_EQ(d, min_lanes.get(), Min128(d, a, b));
186-
HWY_ASSERT_VEC_EQ(d, max_lanes.get(), Max128(d, a, b));
187-
}
188-
}
189-
};
190-
191-
HWY_NOINLINE void TestAllMinMax128() {
192-
ForGEVectors<128, TestMinMax128>()(uint64_t());
193-
}
194-
195-
struct TestMinMax128Upper {
196-
template <typename T, class D>
197-
HWY_NOINLINE void operator()(T /*unused*/, D d) {
198-
using V = Vec<D>;
199-
const size_t N = Lanes(d);
200-
auto a_lanes = AllocateAligned<T>(N);
201-
auto b_lanes = AllocateAligned<T>(N);
202-
auto min_lanes = AllocateAligned<T>(N);
203-
auto max_lanes = AllocateAligned<T>(N);
204-
RandomState rng;
205-
206-
const V v00 = Zero(d);
207-
const V v01 = Make128(d, 0, 1);
208-
const V v10 = Make128(d, 1, 0);
209-
const V v11 = Add(v01, v10);
210-
211-
// Same arg
212-
HWY_ASSERT_VEC_EQ(d, v00, Min128Upper(d, v00, v00));
213-
HWY_ASSERT_VEC_EQ(d, v01, Min128Upper(d, v01, v01));
214-
HWY_ASSERT_VEC_EQ(d, v10, Min128Upper(d, v10, v10));
215-
HWY_ASSERT_VEC_EQ(d, v11, Min128Upper(d, v11, v11));
216-
HWY_ASSERT_VEC_EQ(d, v00, Max128Upper(d, v00, v00));
217-
HWY_ASSERT_VEC_EQ(d, v01, Max128Upper(d, v01, v01));
218-
HWY_ASSERT_VEC_EQ(d, v10, Max128Upper(d, v10, v10));
219-
HWY_ASSERT_VEC_EQ(d, v11, Max128Upper(d, v11, v11));
220-
221-
// Equivalent but not equal (chooses second arg)
222-
HWY_ASSERT_VEC_EQ(d, v01, Min128Upper(d, v00, v01));
223-
HWY_ASSERT_VEC_EQ(d, v11, Min128Upper(d, v10, v11));
224-
HWY_ASSERT_VEC_EQ(d, v00, Min128Upper(d, v01, v00));
225-
HWY_ASSERT_VEC_EQ(d, v10, Min128Upper(d, v11, v10));
226-
HWY_ASSERT_VEC_EQ(d, v00, Max128Upper(d, v01, v00));
227-
HWY_ASSERT_VEC_EQ(d, v10, Max128Upper(d, v11, v10));
228-
HWY_ASSERT_VEC_EQ(d, v01, Max128Upper(d, v00, v01));
229-
HWY_ASSERT_VEC_EQ(d, v11, Max128Upper(d, v10, v11));
230-
231-
// First arg less
232-
HWY_ASSERT_VEC_EQ(d, v01, Min128Upper(d, v01, v10));
233-
HWY_ASSERT_VEC_EQ(d, v10, Max128Upper(d, v01, v10));
234-
235-
// Second arg less
236-
HWY_ASSERT_VEC_EQ(d, v01, Min128Upper(d, v10, v01));
237-
HWY_ASSERT_VEC_EQ(d, v10, Max128Upper(d, v10, v01));
238-
239-
// Also check 128-bit blocks are independent
240-
for (size_t rep = 0; rep < AdjustedReps(1000); ++rep) {
241-
for (size_t i = 0; i < N; ++i) {
242-
a_lanes[i] = Random64(&rng);
243-
b_lanes[i] = Random64(&rng);
244-
}
245-
const V a = Load(d, a_lanes.get());
246-
const V b = Load(d, b_lanes.get());
247-
for (size_t i = 0; i < N; i += 2) {
248-
const bool lt = a_lanes[i + 1] < b_lanes[i + 1];
249-
min_lanes[i + 0] = lt ? a_lanes[i + 0] : b_lanes[i + 0];
250-
min_lanes[i + 1] = lt ? a_lanes[i + 1] : b_lanes[i + 1];
251-
max_lanes[i + 0] = lt ? b_lanes[i + 0] : a_lanes[i + 0];
252-
max_lanes[i + 1] = lt ? b_lanes[i + 1] : a_lanes[i + 1];
253-
}
254-
HWY_ASSERT_VEC_EQ(d, min_lanes.get(), Min128Upper(d, a, b));
255-
HWY_ASSERT_VEC_EQ(d, max_lanes.get(), Max128Upper(d, a, b));
256-
}
257-
}
258-
};
259-
260-
HWY_NOINLINE void TestAllMinMax128Upper() {
261-
ForGEVectors<128, TestMinMax128Upper>()(uint64_t());
262-
}
263-
264117
struct TestMinMaxMagnitude {
265118
template <class T>
266119
static constexpr MakeSigned<T> MaxPosIotaVal(hwy::FloatTag /*type_tag*/) {
@@ -372,8 +225,6 @@ namespace hwy {
372225
namespace {
373226
HWY_BEFORE_TEST(HwyMinMaxTest);
374227
HWY_EXPORT_AND_TEST_P(HwyMinMaxTest, TestAllMinMax);
375-
HWY_EXPORT_AND_TEST_P(HwyMinMaxTest, TestAllMinMax128);
376-
HWY_EXPORT_AND_TEST_P(HwyMinMaxTest, TestAllMinMax128Upper);
377228
HWY_EXPORT_AND_TEST_P(HwyMinMaxTest, TestAllMinMaxMagnitude);
378229
HWY_AFTER_TEST();
379230
} // namespace

0 commit comments

Comments
 (0)