forked from delta-io/delta
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run-tests.py
executable file
·237 lines (198 loc) · 8.87 KB
/
run-tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
#!/usr/bin/env python3
#
# Copyright (2021) The Delta Lake Project Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
import subprocess
import shlex
from os import path
import argparse
# Define groups of subprojects that can be tested separately from other groups.
# As of now, we have only defined project groups in the SBT build, so these must match
# the group names defined in build.sbt.
valid_project_groups = ["spark", "kernel"]
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--group",
required=False,
default=None,
choices=valid_project_groups,
help="Run tests on a group of SBT projects"
)
parser.add_argument(
"--coverage",
required=False,
default=False,
action="store_true",
help="Enables test coverage and generates an aggregate report for all subprojects")
return parser.parse_args()
def run_sbt_tests(root_dir, test_group, coverage, scala_version=None):
print("##### Running SBT tests #####")
is_running_spark_tests = test_group is None or test_group == "spark"
sbt_path = path.join(root_dir, path.join("build", "sbt"))
cmd = [sbt_path, "clean"]
test_cmd = "test"
if test_group:
# if test group is specified, then run tests only on that test group
test_cmd = "{}Group/test".format(test_group)
if coverage:
cmd += ["coverage"]
if scala_version is None:
# when no scala version is specified, run test with all scala versions
cmd += ["+ %s" % test_cmd] # build/sbt ... "+ project/test" ...
else:
# when no scala version is specified, run test with only the specified scala version
cmd += ["++ %s" % scala_version, test_cmd] # build/sbt ... "++ 2.13.5" "project/test" ...
if is_running_spark_tests:
cmd += ["unidoc"]
if coverage:
cmd += ["coverageAggregate", "coverageOff"]
cmd += ["-v"] # show java options used
# https://docs.oracle.com/javase/7/docs/technotes/guides/vm/G1.html
# a GC that is optimized for larger multiprocessor machines with large memory
cmd += ["-J-XX:+UseG1GC"]
# 4x the default heap size (set in delta/built.sbt)
cmd += ["-J-Xmx4G"]
run_cmd(cmd, stream_output=True)
def run_python_tests(root_dir):
print("##### Running Python tests #####")
python_test_script = path.join(root_dir, path.join("python", "run-tests.py"))
print("Calling script %s", python_test_script)
run_cmd(["python3", python_test_script], stream_output=True)
def run_cmd(cmd, throw_on_error=True, env=None, stream_output=False, **kwargs):
if isinstance(cmd, str):
old_cmd = cmd
cmd = shlex.split(cmd)
cmd_env = os.environ.copy()
if env:
cmd_env.update(env)
print("Running command: " + str(cmd))
if stream_output:
child = subprocess.Popen(cmd, env=cmd_env, **kwargs)
exit_code = child.wait()
if throw_on_error and exit_code != 0:
raise Exception("Non-zero exitcode: %s" % (exit_code))
return exit_code
else:
child = subprocess.Popen(
cmd,
env=cmd_env,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
**kwargs)
(stdout, stderr) = child.communicate()
exit_code = child.wait()
if not isinstance(stdout, str):
# Python 3 produces bytes which needs to be converted to str
stdout = stdout.decode("utf-8")
stderr = stderr.decode("utf-8")
if throw_on_error and exit_code is not 0:
raise Exception(
"Non-zero exitcode: %s\n\nSTDOUT:\n%s\n\nSTDERR:%s" %
(exit_code, stdout, stderr))
return (exit_code, stdout, stderr)
def pull_or_build_docker_image(root_dir):
"""
This method prepare the docker image for running tests. It uses a hash of the Dockerfile
to generate the image tag/name so that we reuse images until the Dockerfile has changed.
Then it tries to prepare that image by either pulling from a Docker registry
(if configured with environment variable DOCKER_REGISTRY) or by building it from
scratch using the Dockerfile. If pulling from registry fails, then it will fallback
to building it from scratch, but it will also attempt to push to the registry to
avoid image builds in the future.
"""
dockerfile_path = os.path.join(root_dir, "Dockerfile")
_, out, _ = run_cmd("md5sum %s" % dockerfile_path)
dockerfile_hash = out.strip().split(" ")[0].strip()
print("Dockerfile hash: %s" % dockerfile_hash)
test_env_image_tag = "delta_test_env:%s" % dockerfile_hash
print("Test env image: %s" % test_env_image_tag)
docker_registry = os.getenv("DOCKER_REGISTRY")
print("Docker registry set as " + str(docker_registry))
def build_image():
print("---\nBuilding image %s ..." % test_env_image_tag)
run_cmd("docker build --tag=%s %s" % (test_env_image_tag, root_dir))
print("Built image %s" % test_env_image_tag)
def pull_image(registry_image_tag):
try:
print("---\nPulling image %s ..." % registry_image_tag)
run_cmd("docker pull %s" % registry_image_tag)
run_cmd("docker tag %s %s" % (registry_image_tag, test_env_image_tag))
print("Pulling image %s succeeded" % registry_image_tag)
return True
except Exception as e:
print("Pulling image %s failed: %s" % (registry_image_tag, repr(e)))
return False
def push_image(registry_image_tag):
try:
print("---\nPushing image %s ..." % registry_image_tag)
run_cmd("docker tag %s %s" % (test_env_image_tag, registry_image_tag))
run_cmd("docker push %s" % registry_image_tag)
print("Pushing image %s succeeded" % registry_image_tag)
return True
except Exception as e:
print("Pushing image %s failed: %s" % (registry_image_tag, repr(e)))
return False
if docker_registry is not None:
print("Attempting to use the docker registry")
test_env_image_tag_with_registry = docker_registry + "/delta/" + test_env_image_tag
success = pull_image(test_env_image_tag_with_registry)
if not success:
build_image()
push_image(test_env_image_tag_with_registry)
else:
build_image()
return test_env_image_tag
def run_tests_in_docker(image_tag, test_group):
"""
Run the necessary tests in a docker container made from the given image.
It starts the container with the delta repo mounted in it, and then
executes this script.
"""
# Note: Pass only relevant env that the script needs to run in the docker container.
# Do not pass docker related env variable as we want this script to run natively in
# the container and not attempt to recursively another docker container.
envs = "-e JENKINS_URL -e SBT_1_5_5_MIRROR_JAR_URL "
scala_version = os.getenv("SCALA_VERSION")
if scala_version is not None:
envs = envs + "-e SCALA_VERSION=%s " % scala_version
test_parallelism = os.getenv("TEST_PARALLELISM_COUNT")
if test_parallelism is not None:
envs = envs + "-e TEST_PARALLELISM_COUNT=%s " % test_parallelism
cwd = os.getcwd()
test_script = os.path.basename(__file__)
test_script_args = ""
if test_group:
test_script_args += " --group %s" % test_group
test_run_cmd = "docker run --rm -v %s:%s -w %s %s %s ./%s %s" % (
cwd, cwd, cwd, envs, image_tag, test_script, test_script_args
)
run_cmd(test_run_cmd, stream_output=True)
if __name__ == "__main__":
root_dir = os.path.dirname(os.path.abspath(__file__))
args = get_args()
if os.getenv("USE_DOCKER") is not None:
test_env_image_tag = pull_or_build_docker_image(root_dir)
run_tests_in_docker(test_env_image_tag, args.group)
else:
scala_version = os.getenv("SCALA_VERSION")
run_sbt_tests(root_dir, args.group, args.coverage, scala_version)
# Python tests are run only when spark group of projects are being tested.
is_testing_spark_group = args.group is None or args.group == "spark"
# Python tests are skipped when using Scala 2.13 as PySpark doesn't support it.
is_testing_scala_212 = scala_version is None or scala_version.startswith("2.12")
if is_testing_spark_group and is_testing_scala_212:
run_python_tests(root_dir)