add JSON output support for operator benchmark (pytorch#154410)

LifengWang · iupaikov-amd · commit 397f410432b1 · 2025-06-04T11:08:46.000Z
To better support the integration of operator benchmark performance data into the OSS benchmark database for the dashboard, I’ve added a JSON output format that meets the required specifications: https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database#output-format Since the current operator benchmark already has a flag `--output-json` to support saving the results into a JSON file, I add a new flag `--output-json-for-dashboard` for this feature. At the same time, I renamed the `--output-dir` to `--output-csv` for a clearer and more intuitive expression. An example of the JSON output of the operator benchmark. ``` [ { "benchmark": { "name": "PyTorch operator benchmark - add_M1_N1_K1_cpu", "mode": "inference", "dtype": "float32", "extra_info": { "input_config": "M: 1, N: 1, K: 1, device: cpu" } }, "model": { "name": "add_M1_N1_K1_cpu", "type": "micro-benchmark", "origins": [ "pytorch" ] }, "metric": { "name": "latency", "unit": "us", "benchmark_values": [ 2.074 ], "target_value": null } }, { "benchmark": { "name": "PyTorch operator benchmark - add_M64_N64_K64_cpu", "mode": "inference", "dtype": "float32", "extra_info": { "input_config": "M: 64, N: 64, K: 64, device: cpu" } }, "model": { "name": "add_M64_N64_K64_cpu", "type": "micro-benchmark", "origins": [ "pytorch" ] }, "metric": { "name": "latency", "unit": "us", "benchmark_values": [ 9.973 ], "target_value": null } }, ] ``` Pull Request resolved: pytorch#154410 Approved by: https://github.com/huydhn
diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh
@@ -1562,7 +1562,8 @@ test_operator_benchmark() {
 
   cd "${TEST_DIR}"/benchmarks/operator_benchmark
   $TASKSET python -m benchmark_all_test --device "$1" --tag-filter "$2" \
-      --output-dir "${TEST_REPORTS_DIR}/operator_benchmark_eager_float32_cpu.csv"
+      --output-csv "${TEST_REPORTS_DIR}/operator_benchmark_eager_float32_cpu.csv" \
+      --output-json-for-dashboard "${TEST_REPORTS_DIR}/operator_benchmark_eager_float32_cpu.json" \
 
   pip_install pandas
   python check_perf_csv.py \
diff --git a/benchmarks/operator_benchmark/benchmark_core.py b/benchmarks/operator_benchmark/benchmark_core.py
@@ -6,6 +6,8 @@
 import os
 import timeit
 from collections import namedtuple
+from dataclasses import asdict, dataclass
+from typing import Any, Optional
 
 import benchmark_utils
 
@@ -191,9 +193,8 @@ def __init__(self, args):
         self.use_jit = args.use_jit
         self.num_runs = args.num_runs
         self.print_per_iter = False
-        self.output_dir = args.output_dir
+        self.output_csv = args.output_csv
         self.operator_range = benchmark_utils.get_operator_range(args.operator_range)
-        self.disable_output = args.disable_output
         # 100 is the default warmup iterations
         if self.args.warmup_iterations == -1:
             self.args.warmup_iterations = 100
@@ -457,8 +458,6 @@ def _print_test_case_info(self, test_case):
         return False
 
     def _output_csv(self, filename, headers, row):
-        if self.args.disable_output is True:
-            return
         if os.path.exists(filename):
             with open(filename) as fd:
                 lines = list(csv.reader(fd)) or [[]]
@@ -475,9 +474,91 @@ def _output_csv(self, filename, headers, row):
             for line in lines:
                 writer.writerow(list(line) + ["0"] * (len(headers) - len(line)))
 
+    def _output_json(
+        self,
+        perf_list,
+        output_file,
+    ):
+        """
+        Write the result into JSON format, so that it can be uploaded to the benchmark database
+        to be displayed on OSS dashboard. The JSON format is defined at
+        https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
+        """
+        if not perf_list:
+            return
+
+        # Prepare headers and records for JSON output
+        records = []
+        for perf_item in perf_list:
+            # Extract data from perf_item
+            test_name = perf_item.get("test_name", "unknown")
+            input_config = perf_item.get("input_config", "")
+            run_type = perf_item.get("run")
+            latency = perf_item.get("latency", 0)
+
+            dtype = "float32"  # default
+
+            # Extract mode based on run_type
+            mode = None
+            if run_type == "Forward":
+                mode = "inference"
+            elif run_type == "Backward":
+                mode = "training"
+
+            # Create the record
+            @dataclass
+            class BenchmarkInfo:
+                name: str
+                mode: Optional[str]
+                dtype: str
+                extra_info: dict[str, Any]
+
+            @dataclass
+            class ModelInfo:
+                name: str
+                type: str
+                origins: list[str]
+
+            @dataclass
+            class MetricInfo:
+                name: str
+                unit: str
+                benchmark_values: list[float]
+                target_value: Optional[float]
+
+            @dataclass
+            class BenchmarkRecord:
+                benchmark: BenchmarkInfo
+                model: ModelInfo
+                metric: MetricInfo
+
+            record = BenchmarkRecord(
+                benchmark=BenchmarkInfo(
+                    name="PyTorch operator benchmark",
+                    mode=mode,
+                    dtype=dtype,
+                    extra_info={"input_config": input_config},
+                ),
+                model=ModelInfo(
+                    name=test_name, type="micro-benchmark", origins=["pytorch"]
+                ),
+                metric=MetricInfo(
+                    name="latency",
+                    unit="us",
+                    benchmark_values=[latency],
+                    target_value=None,
+                ),
+            )
+
+            records.append(asdict(record))
+
+        # Write all records to the output file
+        with open(output_file, "w", encoding="utf-8") as f:
+            json.dump(records, f, indent=2)
+
     def run(self):
         self._print_header()
-        output_filename = self.args.output_dir
+        output_csv_filename = self.args.output_csv
         headers = [
             "Benchmarking Framework",
             "Benchamrking Module Name",
@@ -487,7 +568,7 @@ def run(self):
             "Execution Time",
         ]
 
-        if self.args.output_json:
+        if self.args.output_json or self.args.output_json_for_dashboard:
             perf_list = []
 
         for test_metainfo in BENCHMARK_TESTER:
@@ -532,7 +613,7 @@ def run(self):
 
                 # output results to csv
                 self._output_csv(
-                    output_filename,
+                    output_csv_filename,
                     headers,
                     [
                         test_case.framework,
@@ -547,11 +628,14 @@ def run(self):
                         reported_time[0],
                     ],
                 )
-                if self.args.output_json:
+                if self.args.output_json or self.args.output_json_for_dashboard:
                     perf_list.append(
                         self._perf_result_to_dict(reported_time, test_case)
                     )
 
+        if self.args.output_json_for_dashboard:
+            self._output_json(perf_list, self.args.output_json_for_dashboard)
+
         if self.args.output_json:
             with open(self.args.output_json, "w") as f:
                 json.dump(perf_list, f)
diff --git a/benchmarks/operator_benchmark/benchmark_runner.py b/benchmarks/operator_benchmark/benchmark_runner.py
@@ -152,13 +152,16 @@ def parse_args():
     )
 
     parser.add_argument(
-        "--output-dir",
-        help="Choose the output directory to save the logs",
+        "--output-csv",
+        "--output_csv",
+        help="CSV file path to store the results",
         default="benchmark_logs",
     )
+
     parser.add_argument(
-        "--disable-output",
-        help="Disable log output to csv file",
+        "--output-json-for-dashboard",
+        "--output_json_for_dashboard",
+        help="Save results in JSON format for display on the OSS dashboard",
         default="False",
     )
 

Original file line number	Diff line number	Diff line change
`@@ -152,13 +152,16 @@ def parse_args():`
`152`	`152`	`)`
`153`	`153`
`154`	`154`	`parser.add_argument(`
`155`		`- "--output-dir",`
`156`		`- help="Choose the output directory to save the logs",`
	`155`	`+ "--output-csv",`
	`156`	`+ "--output_csv",`
	`157`	`+ help="CSV file path to store the results",`
`157`	`158`	`default="benchmark_logs",`
`158`	`159`	`)`
	`160`	`+`
`159`	`161`	`parser.add_argument(`
`160`		`- "--disable-output",`
`161`		`- help="Disable log output to csv file",`
	`162`	`+ "--output-json-for-dashboard",`
	`163`	`+ "--output_json_for_dashboard",`
	`164`	`+ help="Save results in JSON format for display on the OSS dashboard",`
`162`	`165`	`default="False",`
`163`	`166`	`)`
`164`	`167`