diff --git a/dev/archery/archery/benchmark/google.py b/dev/archery/archery/benchmark/google.py index ebcc5263645f2..5d07ffab2edd8 100644 --- a/dev/archery/archery/benchmark/google.py +++ b/dev/archery/archery/benchmark/google.py @@ -37,9 +37,10 @@ class GoogleBenchmarkCommand(Command): notably `--benchmark_filter`, `--benchmark_format`, etc... """ - def __init__(self, benchmark_bin, benchmark_filter=None): + def __init__(self, benchmark_bin, benchmark_filter=None, benchmark_extras=None): self.bin = benchmark_bin self.benchmark_filter = benchmark_filter + self.benchmark_extras = benchmark_extras or [] def list_benchmarks(self): argv = ["--benchmark_list_tests"] @@ -49,16 +50,19 @@ def list_benchmarks(self): stderr=subprocess.PIPE) return str.splitlines(result.stdout.decode("utf-8")) - def results(self, repetitions=1): + def results(self, repetitions=1, repetition_min_time=None): with NamedTemporaryFile() as out: - argv = ["--benchmark_repetitions={}".format(repetitions), - "--benchmark_out={}".format(out.name), + argv = [f"--benchmark_repetitions={repetitions}", + f"--benchmark_out={out.name}", "--benchmark_out_format=json"] + if repetition_min_time is not None: + argv.append(f"--benchmark_min_time={repetition_min_time:.6f}") + if self.benchmark_filter: - argv.append( - "--benchmark_filter={}".format(self.benchmark_filter) - ) + argv.append(f"--benchmark_filter={self.benchmark_filter}") + + argv += self.benchmark_extras self.run(*argv, check=True) return json.load(out) diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py index 86053e6ecdc97..c12c74135e96e 100644 --- a/dev/archery/archery/benchmark/runner.py +++ b/dev/archery/archery/benchmark/runner.py @@ -42,10 +42,11 @@ def regex_filter(re_expr): class BenchmarkRunner: def __init__(self, suite_filter=None, benchmark_filter=None, - repetitions=DEFAULT_REPETITIONS): + repetitions=DEFAULT_REPETITIONS, repetition_min_time=None): self.suite_filter = suite_filter self.benchmark_filter = benchmark_filter self.repetitions = repetitions + self.repetition_min_time = repetition_min_time @property def suites(self): @@ -107,9 +108,10 @@ def __repr__(self): class CppBenchmarkRunner(BenchmarkRunner): """ Run suites from a CMakeBuild. """ - def __init__(self, build, **kwargs): + def __init__(self, build, benchmark_extras, **kwargs): """ Initialize a CppBenchmarkRunner. """ self.build = build + self.benchmark_extras = benchmark_extras super().__init__(**kwargs) @staticmethod @@ -142,14 +144,17 @@ def suites_binaries(self): def suite(self, name, suite_bin): """ Returns the resulting benchmarks for a given suite. """ - suite_cmd = GoogleBenchmarkCommand(suite_bin, self.benchmark_filter) + suite_cmd = GoogleBenchmarkCommand(suite_bin, self.benchmark_filter, + self.benchmark_extras) # Ensure there will be data benchmark_names = suite_cmd.list_benchmarks() if not benchmark_names: return None - results = suite_cmd.results(repetitions=self.repetitions) + results = suite_cmd.results( + repetitions=self.repetitions, + repetition_min_time=self.repetition_min_time) benchmarks = GoogleBenchmark.from_json(results.get("benchmarks")) return BenchmarkSuite(name, benchmarks) @@ -252,6 +257,7 @@ def suite(self, name): if not benchmark_names: return None + # TODO: support `repetition_min_time` results = suite_cmd.results(repetitions=self.repetitions) benchmarks = JavaMicrobenchmarkHarness.from_json(results) return BenchmarkSuite(name, benchmarks) diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index 32b094263098c..052fe23bfc969 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -377,7 +377,10 @@ def check_language(ctx, param, value): "Can be stacked. For language=java"), click.option("--cmake-extras", type=str, multiple=True, help="Extra flags/options to pass to cmake invocation. " - "Can be stacked. For language=cpp") + "Can be stacked. For language=cpp"), + click.option("--cpp-benchmark-extras", type=str, multiple=True, + help="Extra flags/options to pass to C++ benchmark executables. " + "Can be stacked. For language=cpp"), ] cmd = java_toolchain_options(cmd) @@ -440,12 +443,16 @@ def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras, @click.option("--repetitions", type=int, default=-1, help=("Number of repetitions of each benchmark. Increasing " "may improve result precision. " - "[default: 1 for cpp, 5 for java")) + "[default: 1 for cpp, 5 for java]")) +@click.option("--repetition-min-time", type=float, default=None, + help=("Minimum duration of each repetition in seconds. " + "Currently only supported for language=cpp. " + "[default: use runner-specific defaults]")) @click.pass_context def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras, java_home, java_options, build_extras, benchmark_extras, language, suite_filter, benchmark_filter, repetitions, - **kwargs): + repetition_min_time, cpp_benchmark_extras, **kwargs): """ Run benchmark suite. This command will run the benchmark suite for a single build. This is @@ -468,13 +475,18 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras, \b archery benchmark run + \b + # Run the benchmarks on an existing build directory + \b + archery benchmark run /build/cpp + \b # Run the benchmarks on current previous commit \b archery benchmark run HEAD~1 \b - # Run the benchmarks on current previous commit + # Run the benchmarks on current git workspace and output results as a JSON file. \b archery benchmark run --output=run.json """ @@ -488,8 +500,9 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras, repetitions = repetitions if repetitions != -1 else 1 runner_base = CppBenchmarkRunner.from_rev_or_path( src, root, rev_or_path, conf, - repetitions=repetitions, - suite_filter=suite_filter, benchmark_filter=benchmark_filter) + repetitions=repetitions, repetition_min_time=repetition_min_time, + suite_filter=suite_filter, benchmark_filter=benchmark_filter, + benchmark_extras=cpp_benchmark_extras) elif language == "java": for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}: diff --git a/dev/archery/archery/tests/test_benchmarks.py b/dev/archery/archery/tests/test_benchmarks.py index fab1e8d443219..e5af2b3b02794 100644 --- a/dev/archery/archery/tests/test_benchmarks.py +++ b/dev/archery/archery/tests/test_benchmarks.py @@ -81,6 +81,53 @@ def test_static_runner_from_json_not_a_regression(): assert not comparison.regression +def test_static_runner_from_json_multiple_values_not_a_regression(): + # Same as above, but with multiple repetitions + archery_result = { + "suites": [ + { + "name": "arrow-value-parsing-benchmark", + "benchmarks": [ + { + "name": "FloatParsing", + "unit": "items_per_second", + "less_is_better": False, + "values": [ + 93588476.22327498, + 94873831.3818328, + 95593675.20810866, + 95797325.6543961, + 96134728.05794072 + ], + "time_unit": "ns", + "times": [ + 10537.724568456104, + 10575.162068480413, + 10599.271208720838, + 10679.028059166194, + 10827.995119861762 + ], + "counters": { + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "FloatParsing", + "repetitions": 5, + "repetition_index": 0, + "threads": 1, + "iterations": 10656 + } + } + ] + } + ] + } + + contender = StaticBenchmarkRunner.from_json(json.dumps(archery_result)) + baseline = StaticBenchmarkRunner.from_json(json.dumps(archery_result)) + [comparison] = RunnerComparator(contender, baseline).comparisons + assert not comparison.regression + + def test_static_runner_from_json_regression(): archery_result = { "suites": [ @@ -114,6 +161,58 @@ def test_static_runner_from_json_regression(): assert comparison.regression +def test_static_runner_from_json_multiple_values_regression(): + # Same as above, but with multiple repetitions + archery_result = { + "suites": [ + { + "name": "arrow-value-parsing-benchmark", + "benchmarks": [ + { + "name": "FloatParsing", + "unit": "items_per_second", + "less_is_better": False, + "values": [ + 93588476.22327498, + 94873831.3818328, + 95593675.20810866, + 95797325.6543961, + 96134728.05794072 + ], + "time_unit": "ns", + "times": [ + 10537.724568456104, + 10575.162068480413, + 10599.271208720838, + 10679.028059166194, + 10827.995119861762 + ], + "counters": { + "family_index": 0, + "per_family_instance_index": 0, + "run_name": "FloatParsing", + "repetitions": 5, + "repetition_index": 0, + "threads": 1, + "iterations": 10656 + } + } + ] + } + ] + } + + contender = StaticBenchmarkRunner.from_json(json.dumps(archery_result)) + + # introduce artificial regression + values = archery_result['suites'][0]['benchmarks'][0]['values'] + values[:] = [v * 2 for v in values] + baseline = StaticBenchmarkRunner.from_json(json.dumps(archery_result)) + + [comparison] = RunnerComparator(contender, baseline).comparisons + assert comparison.regression + + def test_benchmark_median(): assert median([10]) == 10 assert median([1, 2, 3]) == 2 @@ -381,3 +480,77 @@ def test_omits_aggregates(): benchmark = GoogleBenchmark(name, [observation1, observation2]) result = json.dumps(benchmark, cls=JsonEncoder) assert json.loads(result) == archery_result + + +def test_multiple_observations(): + name = "FloatParsing" + google_results = [ + { + 'cpu_time': 10627.38199641615, + 'family_index': 0, + 'items_per_second': 94096551.75067839, + 'iterations': 9487, + 'name': 'FloatParsing', + 'per_family_instance_index': 0, + 'real_time': 10628.84905663701, + 'repetition_index': 0, + 'repetitions': 3, + 'run_name': 'FloatParsing', + 'run_type': 'iteration', + 'threads': 1, + 'time_unit': 'ns' + }, + { + 'cpu_time': 10633.318014124594, + 'family_index': 0, + 'items_per_second': 94044022.63448404, + 'iterations': 9487, + 'name': 'FloatParsing', + 'per_family_instance_index': 0, + 'real_time': 10634.858754122948, + 'repetition_index': 1, + 'repetitions': 3, + 'run_name': 'FloatParsing', + 'run_type': 'iteration', + 'threads': 1, + 'time_unit': 'ns' + }, + { + 'cpu_time': 10664.315484347, + 'family_index': 0, + 'items_per_second': 93770669.24434038, + 'iterations': 9487, + 'name': 'FloatParsing', + 'per_family_instance_index': 0, + 'real_time': 10665.584589337563, + 'repetition_index': 2, + 'repetitions': 3, + 'run_name': 'FloatParsing', + 'run_type': 'iteration', + 'threads': 1, + 'time_unit': 'ns' + } + ] + + archery_result = { + 'counters': { + 'family_index': 0, + 'iterations': 9487, + 'per_family_instance_index': 0, + 'repetition_index': 2, + 'repetitions': 3, + 'run_name': 'FloatParsing', + 'threads': 1 + }, + 'less_is_better': False, + 'name': 'FloatParsing', + 'time_unit': 'ns', + 'times': [10628.84905663701, 10634.858754122948, 10665.584589337563], + 'unit': 'items_per_second', + 'values': [93770669.24434038, 94044022.63448404, 94096551.75067839] + } + + observations = [GoogleBenchmarkObservation(**g) for g in google_results] + benchmark = GoogleBenchmark(name, observations) + result = json.dumps(benchmark, cls=JsonEncoder) + assert json.loads(result) == archery_result