forked from ClickHouse/ClickBench
-
Notifications
You must be signed in to change notification settings - Fork 0
/
hardware.sh
executable file
·247 lines (210 loc) · 7.7 KB
/
hardware.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
#!/bin/bash -e
QUERIES_FILE="queries.sql"
TRIES=3
mkdir -p clickhouse-benchmark
pushd clickhouse-benchmark
# Download the binary
if [[ ! -x clickhouse ]]; then
curl https://clickhouse.com/ | sh
fi
if [[ ! -f $QUERIES_FILE ]]; then
wget "https://raw.githubusercontent.com/ClickHouse/ClickBench/main/hardware/$QUERIES_FILE"
fi
uptime
echo "Starting clickhouse-server"
./clickhouse server >/dev/null 2>&1 &
PID=$!
function finish {
kill $PID
wait
}
trap finish EXIT
echo "Waiting for clickhouse-server to start"
for i in {1..30}; do
sleep 1
./clickhouse client --query "SELECT 'Ok.'" 2>/dev/null && break || echo -n '.'
if [[ $i == 30 ]]; then exit 1; fi
done
if [[ $(./clickhouse client --query "EXISTS hits") == '1' && $(./clickhouse client --query "SELECT count() FROM hits") == '100000000' ]]; then
echo "Dataset already downloaded"
else
echo "Will download the dataset"
if [ "`uname`" = "Darwin" ]
then
./clickhouse client --receive_timeout 1000 --max_insert_threads $(sysctl -n hw.ncpu) --progress --query "
CREATE OR REPLACE TABLE hits ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime)
AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')"
else
./clickhouse client --receive_timeout 1000 --max_insert_threads $(nproc || 4) --progress --query "
CREATE OR REPLACE TABLE hits ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime)
AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')"
fi
./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM hits"
fi
if [[ $(./clickhouse client --query "SELECT count() FROM system.parts WHERE table = 'hits' AND database = 'default' AND active") == '1' ]]; then
echo "Dataset already prepared"
else
echo "Will prepare the dataset"
./clickhouse client --receive_timeout 1000 --query "OPTIMIZE TABLE hits FINAL"
fi
echo
echo "Will perform benchmark. Results:"
echo
>result.csv
QUERY_NUM=1
cat "$QUERIES_FILE" | sed "s/{table}/hits/g" | while read query; do
sync
if [ "`uname`" = "Darwin" ]
then
sudo purge > /dev/null
else
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
fi
echo -n "["
for i in $(seq 1 $TRIES); do
RES=$(./clickhouse client --time --format=Null --query="$query" 2>&1 ||:)
[[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null"
[[ "$i" != $TRIES ]] && echo -n ", "
echo "${QUERY_NUM},${i},${RES}" >> result.csv
done
echo "],"
QUERY_NUM=$((QUERY_NUM + 1))
done
echo
echo "Benchmark complete. System info:"
echo
touch {cpu_model,cpu,df,memory,memory_total,blk,mdstat,instance}.txt
if [ "`uname`" = "Darwin" ]
then
echo '----Version, build id-----------'
./clickhouse local --query "SELECT format('Version: {}', version())"
./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw
./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))"
echo '----CPU-------------------------'
sysctl hw.model | tee cpu_model.txt
sysctl -a | grep -E 'hw.activecpu|hw.memsize|hw.byteorder|cachesize' | tee cpu.txt
echo '----Disk Free and Total--------'
df -h . | tee df.txt
echo '----Memory Free and Total-------'
vm_stat | tee memory.txt
echo '----Physical Memory Amount------'
ls -l /var/vm | tee memory_total.txt
echo '--------------------------------'
else
echo '----Version, build id-----------'
./clickhouse local --query "SELECT format('Version: {}, build id: {}', version(), buildId())"
./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw
./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))"
echo '----CPU-------------------------'
cat /proc/cpuinfo | grep -i -F 'model name' | uniq | tee cpu_model.txt
lscpu | tee cpu.txt
echo '----Block Devices---------------'
lsblk | tee blk.txt
echo '----Disk Free and Total--------'
df -h . | tee df.txt
echo '----Memory Free and Total-------'
free -h | tee memory.txt
echo '----Physical Memory Amount------'
cat /proc/meminfo | grep MemTotal | tee memory_total.txt
echo '----RAID Info-------------------'
cat /proc/mdstat| tee mdstat.txt
echo '--------------------------------'
fi
echo
echo "Instance type from IMDS (if available):"
curl -s --connect-timeout 1 'http://169.254.169.254/latest/meta-data/instance-type' | tee instance.txt
echo
echo "Uploading the results (if possible)"
UUID=$(./clickhouse local --query "SELECT generateUUIDv4()")
./clickhouse local --query "
SELECT
'${UUID}' AS run_id,
version() AS version,
now() AS test_time,
(SELECT value FROM system.settings WHERE name = 'max_threads') AS threads,
filesystemCapacity() AS fs_capacity,
filesystemAvailable() AS fs_available,
file('cpu_model.txt') AS cpu_model,
file('cpu.txt') AS cpu,
file('df.txt') AS df,
file('memory.txt') AS memory,
file('memory_total.txt') AS memory_total,
file('blk.txt') AS blk,
file('mdstat.txt') AS mdstat,
file('instance.txt') AS instance
" | tee meta.tsv | ./clickhouse client --host play.clickhouse.com --secure --user benchmark --query "
INSERT INTO benchmark_runs
(run_id, version, test_time, threads, fs_capacity, fs_available, cpu_model, cpu, df, memory, memory_total, blk, mdstat, instance)
FORMAT TSV" || echo "Cannot upload results."
./clickhouse local --query "
SELECT
'${UUID}' AS run_id,
c1 AS query_num,
c2 AS try_num,
c3 AS time
FROM file('result.csv')
" | tee results.tsv | ./clickhouse client --host play.clickhouse.com --secure --user benchmark --query "
INSERT INTO benchmark_results
(run_id, query_num, try_num, time)
FORMAT TSV" || echo "Cannot upload results. Please send the output to [email protected]"
<<////
Server Setup:
CREATE TABLE benchmark_runs
(
run_id UUID,
version String,
test_time DateTime,
threads String,
fs_capacity UInt64,
fs_available UInt64,
cpu_model String,
cpu String,
df String,
memory String,
memory_total String,
blk String,
mdstat String,
instance String
) ENGINE = ReplicatedMergeTree ORDER BY run_id;
CREATE TABLE benchmark_results
(
run_id UUID,
query_num UInt8,
try_num UInt8,
time Decimal32(3)
) ENGINE = ReplicatedMergeTree ORDER BY (run_id, query_num, try_num);
CREATE USER benchmark IDENTIFIED WITH no_password SETTINGS max_rows_to_read = 1, max_result_rows = 1, max_execution_time = 1;
CREATE QUOTA benchmark
KEYED BY ip_address
FOR RANDOMIZED INTERVAL 1 MINUTE MAX query_inserts = 4, written_bytes = 100000,
FOR RANDOMIZED INTERVAL 1 HOUR MAX query_inserts = 10, written_bytes = 500000,
FOR RANDOMIZED INTERVAL 1 DAY MAX query_inserts = 50, written_bytes = 2000000
TO benchmark;
GRANT INSERT ON benchmark_runs TO benchmark;
GRANT INSERT ON benchmark_results TO benchmark;
Example query:
SELECT
cpu_model,
threads,
instance,
k
FROM
(
SELECT
run_id,
exp(avg(log(adjusted_time / best_time))) AS k
FROM
(
WITH greatest(time, 0.01) AS adjusted_time
SELECT
run_id,
adjusted_time,
min(adjusted_time) OVER (PARTITION BY query_num, try_num) AS best_time
FROM benchmark_results
WHERE try_num > 1
)
GROUP BY run_id
ORDER BY k ASC
) AS t
INNER JOIN benchmark_runs USING (run_id)
////