Skip to content

Commit 86ee14a

Browse files
authored
[RISCV] Add "gauntlet" bot for rapidly testing a range of configs with llvm-test-suite (#428)
More so than the other RISC-V builders, this is optimised for getting some testing feedback as quickly as possible. It isn't intended to replace any of those, meaning we can rely on them still running for higher coverage and higher emulation fidelity. The idea of this bot is to help with the steps we typically end up taking if there's an unexpected bot failure: * Figure out if it's easily reproducible on a simple test (especially true if there was an issue in a stage2 native build). * Determine if it affects all or only some configurations. * Narrow down exactly which patch caused it (helped here due to more rapid cycle time of this bot). It does this by doing a simple stage1 build, then cross-compiling the llvm-test-suite in multiple configurations and running with qemu-user. It takes about 25 minutes to run (I masked a couple of 'long pole' tests that take a lot longer than others). If we had "gatekeeper" <https://discourse.llvm.org/t/rfc-introduce-gate-keeper-builders-to-reduce-notification-noise-from-long-running-bots/67931> bots this would be an ideal candidate - to be run as soon as a fast x86 bot determines that there's no failure from it's perspecitve (meaning we can run to check for anything RISC-V specific). In the absence of support for that setup, we do it back to front - run the llvm-test-suite across the configurations of interest. If there were any failures, then do 'check-all' for the x86 clang compiler so that we can at least flag in the logs if it appears this likely an issue that isn't RISC-V specific. A repeat and sleep is used for STEP_FAILURE emission. It doesn't work reliably without it (see zorg/buildbot/builders/sanitizers/buildbot_functions.sh which ran into the same problem). It's not clear what the problem is, but a delay alone is not enough (e.g. printing and sleeping for a minute), the combination of printing the failure string multiple times and waiting after each one seems necessary.
1 parent a9b3e2e commit 86ee14a

File tree

3 files changed

+181
-0
lines changed

3 files changed

+181
-0
lines changed

Diff for: buildbot/osuosl/master/config/builders.py

+13
Original file line numberDiff line numberDiff line change
@@ -3424,6 +3424,19 @@
34243424
script_interpreter=None,
34253425
clean=True)},
34263426

3427+
## Simple single-stage build of clang, then cross-building and running the
3428+
## llvm-test-suite under qemu-user for a number of configurations. If
3429+
## there is a failure, do a check-all of the native (x86_64) LLVM, to provide
3430+
## an indicator as to whether the problem is likely RISC-V specific or not.
3431+
{'name' : "clang-riscv-gauntlet",
3432+
'workernames' : ["rise-worker-1"],
3433+
'builddir':"clang-riscv-gauntlet",
3434+
'factory' : AnnotatedBuilder.getAnnotatedBuildFactory(
3435+
script="rise-riscv-gauntlet-build.sh",
3436+
checkout_llvm_sources=False,
3437+
script_interpreter=None,
3438+
clean=True)},
3439+
34273440
# Builders similar to used in Buildkite premerge pipeline.
34283441
# Please keep in sync with llvm-project/.ci configurations.
34293442

Diff for: buildbot/osuosl/master/config/workers.py

+1
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,7 @@ def get_all():
398398
create_worker("rise-clang-riscv-rva23-2stage", properties={'jobs' : 32}, max_builds=1),
399399
create_worker("rise-clang-riscv-rva23-mrvv-vec-bits-2stage", properties={'jobs' : 16}, max_builds=1),
400400
create_worker("rise-clang-riscv-rva23-evl-vec-2stage", properties={'jobs' : 16}, max_builds=1),
401+
create_worker("rise-worker-1", properties={'jobs' : 32}, max_builds=1),
401402

402403
# FIXME: A placeholder for annoying worker which nobody could stop.
403404
# adding it avoid logs spammed by failed authentication for that worker.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
#!/bin/sh
2+
3+
# We don't want to build within 'build' (where we start by default).
4+
cd ..
5+
rm -rf build
6+
7+
LLVM_REVISION="${BUILDBOT_REVISION:-origin/main}"
8+
9+
die() {
10+
printf "%s\n" "$*" >&2
11+
exit 1
12+
}
13+
14+
build_step() {
15+
printf "@@@BUILD_STEP %s@@@\n" "$*" >&2
16+
}
17+
step_text() {
18+
printf "@@@STEP_TEXT@%s@@@\n" "$*" >&2
19+
}
20+
HAD_FAILURE=0
21+
step_failure() {
22+
HAD_FAILURE=1
23+
# Use same workaround as the sanitizers - the server fails to pick up step
24+
# failures unless repeated multiple times with a delay.
25+
for _ in 0 1 2 ; do
26+
echo "@@@STEP_FAILURE@@@" >&2
27+
sleep 5
28+
done
29+
}
30+
31+
set -u # Exit on referencing an unset variable.
32+
set -x # Trace all commands.
33+
34+
set -e # Exit upon command failure. Will be disabled later.
35+
36+
if [ ! -d llvm-project ]; then
37+
build_step "Cloning llvm-project repo"
38+
git clone --progress https://github.com/llvm/llvm-project.git
39+
fi
40+
41+
build_step "Updating llvm-project repo"
42+
git -C llvm-project fetch origin
43+
git -C llvm-project reset --hard "${LLVM_REVISION}"
44+
45+
if [ ! -d llvm-test-suite ]; then
46+
build_step "Cloning llvm-test-suite repo"
47+
git clone --progress https://github.com/llvm/llvm-test-suite.git
48+
fi
49+
50+
build_step "Updating llvm-test-suite repo"
51+
git -C llvm-test-suite fetch origin
52+
git -C llvm-test-suite reset --hard origin/main
53+
54+
# We unconditionally clean (i.e. don't check BUILDBOT_CLOBBER=1) as the script
55+
# hasn't been tested without cleaning after each build.
56+
build_step "Cleaning last build"
57+
rm -rf llvm-project/build llvm-test-suite/build.* *-toolchain.cmake
58+
59+
build_step "llvm-project configure stage 1"
60+
cmake -G Ninja \
61+
-DCMAKE_BUILD_TYPE=Release \
62+
-DLLVM_ENABLE_ASSERTIONS=True \
63+
-DLLVM_LIT_ARGS="-v" \
64+
-DCMAKE_C_COMPILER=clang \
65+
-DCMAKE_CXX_COMPILER=clang++ \
66+
-DLLVM_ENABLE_LLD=True \
67+
-DLLVM_TARGETS_TO_BUILD="RISCV;X86" \
68+
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
69+
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
70+
-DLLVM_ENABLE_PROJECTS="lld;clang;llvm" \
71+
-B llvm-project/build/stage1 \
72+
-S llvm-project/llvm
73+
74+
build_step "llvm-project build stage 1"
75+
cmake --build llvm-project/build/stage1
76+
77+
STAGE1_BINDIR=$(pwd)/llvm-project/build/stage1/bin
78+
79+
# Don't exit immediately upon failure from here on.
80+
set +e
81+
82+
# Skip a few tests that have excessive runtimes relative to the others.
83+
export LIT_FILTER_OUT='(SingleSource/Benchmarks/Polybench/linear-algebra/solvers/(ludcmp|lu)|MicroBenchmarks/LoopVectorization/LoopInterleavingBenchmarks)'
84+
for CONF in rva20 rva22 rva23 rva23-evl rva23-mrvv-vec-bits; do
85+
RVA23_QEMU_CPU="rv64,zba=true,zbb=true,zbc=false,zbs=true,zfhmin=true,v=true,vext_spec=v1.0,zkt=true,zvfhmin=true,zvbb=true,zvkt=true,zihintntl=true,zicond=true,zimop=true,zcmop=true,zcb=true,zfa=true,zawrs=true,rvv_ta_all_1s=true,rvv_ma_all_1s=true,rvv_vl_half_avl=true"
86+
case "$CONF" in
87+
rva20)
88+
CFLAGS="-march=rva20u64"
89+
QEMU_CPU="rv64,zfa=false,zba=false,zbb=false,zbc=false,zbs=false"
90+
;;
91+
rva22)
92+
CFLAGS="-march=rva22u64"
93+
QEMU_CPU="rv64,zba=true,zbb=true,zbc=false,zbs=true,zfhmin=true,v=false,zkt=true,zihintntl=true"
94+
;;
95+
rva23)
96+
CFLAGS="-march=rva23u64"
97+
QEMU_CPU=$RVA23_QEMU_CPU
98+
;;
99+
rva23-evl)
100+
CFLAGS="-march=rva23u64 -mllvm -force-tail-folding-style=data-with-evl -mllvm -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue"
101+
QEMU_CPU=$RVA23_QEMU_CPU
102+
;;
103+
rva23-mrvv-vec-bits)
104+
CFLAGS="-march=rva23u64 -mrvv-vector-bits=zvl"
105+
QEMU_CPU=$RVA23_QEMU_CPU
106+
;;
107+
*)
108+
echo "Unrecognised config name"
109+
exit 1
110+
esac
111+
export QEMU_LD_PREFIX="$(pwd)/../rvsysroot"
112+
export QEMU_CPU=$QEMU_CPU
113+
cat - <<EOF > $CONF-toolchain.cmake
114+
set(CMAKE_SYSTEM_NAME Linux)
115+
set(CMAKE_SYSROOT $(pwd)/../rvsysroot)
116+
set(CMAKE_C_COMPILER_TARGET riscv64-linux-gnu)
117+
set(CMAKE_CXX_COMPILER_TARGET riscv64-linux-gnu)
118+
set(CMAKE_C_FLAGS_INIT "$CFLAGS -DSMALL_PROBLEM_SIZE")
119+
set(CMAKE_CXX_FLAGS_INIT "$CFLAGS -DSMALL_PROBLEM_SIZE")
120+
set(CMAKE_LINKER_TYPE LLD)
121+
set(CMAKE_C_COMPILER $STAGE1_BINDIR/clang)
122+
set(CMAKE_CXX_COMPILER $STAGE1_BINDIR/clang++)
123+
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
124+
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
125+
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
126+
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
127+
EOF
128+
build_step "$CONF: llvm-test-suite configure"
129+
cmake -G Ninja \
130+
--toolchain=$(pwd)/$CONF-toolchain.cmake \
131+
-DCMAKE_BUILD_TYPE=Release \
132+
-DTEST_SUITE_LIT=$STAGE1_BINDIR/llvm-lit \
133+
-DTEST_SUITE_LIT_FLAGS=-v \
134+
-DTEST_SUITE_COLLECT_CODE_SIZE=OFF \
135+
-DTEST_SUITE_COLLECT_COMPILE_TIME=OFF \
136+
-DTEST_SUITE_USER_MODE_EMULATION=ON \
137+
-DSMALL_PROBLEM_SIZE=ON \
138+
-S llvm-test-suite \
139+
-B llvm-test-suite/build.$CONF
140+
if [ $? -ne 0 ]; then
141+
step_failure
142+
continue
143+
fi
144+
build_step "$CONF: llvm-test-suite build"
145+
cmake --build llvm-test-suite/build.$CONF
146+
if [ $? -ne 0 ]; then
147+
step_failure
148+
continue
149+
fi
150+
build_step "$CONF: llvm-test-suite check"
151+
cmake --build llvm-test-suite/build.$CONF --target check
152+
if [ $? -ne 0 ]; then
153+
step_failure
154+
continue
155+
fi
156+
done
157+
export -n LIT_FILTER_OUT
158+
159+
if [ $HAD_FAILURE -ne 0 ]; then
160+
build_step "llvm-project check-all"
161+
cmake --build llvm-project/build/stage1 --target check-all
162+
if [ $? -ne 0 ]; then
163+
die "check-all on X86_64 host failed. This indicates there is most likely an issue that is not RISC-V specific."
164+
fi
165+
else
166+
build_step "SKIPPED llvm-project check-all"
167+
fi

0 commit comments

Comments
 (0)