Automated Benchmark Result Upload (#1527)

tadiwa-aizen · web-flow · commit 2a9a494442fe · 2025-07-29T13:13:56.000Z
### What changed and why? This PR adds functionality to automatically upload benchmark results to an S3 bucket when benchmarks complete. The implementation includes: 1. A new `detect_result_folder()` function that determines the appropriate result folder path and source path based on Hydra's runtime configuration 2. A new `upload_results_to_s3()` function that uses AWS CLI to sync local benchmark results to the specified S3 bucket These changes enable automated collection of benchmark results in a centralized S3 location, making it easier to analyze performance trends over time. ### Does this change impact existing behavior? No. ### Does this change need a changelog entry? Does it require a version change? No --- By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license and I agree to the terms of the [Developer Certificate of Origin (DCO)](https://developercertificate.org/). --------- Signed-off-by: Tadiwa Magwenzi <tadiwaom@amazon.com>
diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
@@ -5,9 +5,12 @@
 import signal
 import subprocess
 from datetime import datetime, timezone
+from pathlib import Path
 from typing import List, Dict, Any, Optional
 
 import hydra
+from hydra.core.hydra_config import HydraConfig
+from hydra.types import RunMode
 from omegaconf import DictConfig, OmegaConf
 import urllib.request
 
@@ -60,6 +63,40 @@ def write_metadata(metadata: Dict[str, Any]) -> None:
         log.error("Failed to write metadata", exc_info=True)
 
 
+def upload_results_to_s3(bucket_name: str, region: str) -> None:
+    """
+    Upload benchmark results to S3 bucket using the AWS CLI.
+    Only uploads results from multirun directories.
+    """
+
+    hydra_config = HydraConfig.get()
+
+    if hydra_config.mode == RunMode.MULTIRUN:
+        source_path = Path(hydra_config.runtime.output_dir).parent
+
+        assert len(source_path.parts) >= 2, "Source path must have at least 2 parts for date/time extraction"
+        date_part, time_part = source_path.parts[-2:]
+
+        s3_target_path = f"s3://{bucket_name}/results/{date_part}/{time_part}"
+
+        aws_cmd = [
+            "aws",
+            "s3",
+            "sync",
+            str(source_path),
+            s3_target_path,
+            "--region",
+            region,
+        ]
+        result = subprocess.run(aws_cmd, capture_output=True, text=True)
+        if result.returncode == 0:
+            log.info("Successfully uploaded benchmark results to S3")
+        else:
+            log.error(f"S3 upload failed: {result.stderr.strip()}")
+    else:
+        log.info("Skipping benchmark upload for non-multirun")
+
+
 class ResourceMonitoring:
     def __init__(self, target_pid, with_bwm: bool, with_perf_stat: bool):
         """Resource monitoring setup.
@@ -200,6 +237,17 @@ def run_experiment(cfg: DictConfig) -> None:
 
         # Mark success if we get here without exceptions
         metadata["success"] = True
+
+        result_bucket_name = common_config.get("s3_result_bucket")
+
+        # If region is not specified, default to 'us-east-1' as that is the only region we can be relavtively assued that tranium instances are available
+        region = common_config.get("region", "us-east-1")
+        if result_bucket_name:
+            log.info(f"Uploading benchmark results to S3 bucket '{result_bucket_name}'")
+            upload_results_to_s3(result_bucket_name, region)
+        else:
+            log.info("No results bucket specified (s3_result_bucket), skipping upload")
+
     except Exception:
         log.error("Benchmark execution failed:", exc_info=True)
         raise
diff --git a/benchmark/benchmarks/benchmark_config_parser.py b/benchmark/benchmarks/benchmark_config_parser.py
@@ -37,6 +37,7 @@ def get_common_config(self) -> Dict[str, Any]:
             'region': getattr(self.cfg, 'region', "us-east-1"),
             'run_time': getattr(self.cfg, 'run_time', 30),
             's3_bucket': getattr(self.cfg, 's3_bucket', None),
+            's3_result_bucket': getattr(self.cfg, 's3_result_bucket', None),
             's3_keys': self._parse_comma_separated_string_to_array(getattr(self.cfg, 's3_keys', None)),
             'with_bwm': getattr(self.cfg.monitoring, 'with_bwm', False),
             'write_part_size': getattr(self.cfg, 'write_part_size', 16777216),  # 16 MiB
diff --git a/benchmark/conf/config.yaml b/benchmark/conf/config.yaml
@@ -6,6 +6,7 @@ defaults:
 
 # ===== Common parameters for all benchmarks =====
 s3_bucket: ???
+s3_result_bucket: !!null
 application_workers: 1
 iteration: 0
 iterations: 1