ML Perf Micro Shortfin #6
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright 2024 Advanced Micro Devices, Inc | |
# | |
# Licensed under the Apache License v2.0 with LLVM Exceptions. | |
# See https://llvm.org/LICENSE.txt for license information. | |
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |
name: ML Perf Micro Shortfin | |
on: | |
workflow_dispatch: | |
pull_request: | |
# Run at 7:00 PM PST. | |
schedule: | |
- cron: '0 3 * * *' | |
jobs: | |
start_container: | |
name: "Start Container" | |
runs-on: linux-mi300-8gpu-ossci-nod-ai | |
steps: | |
- name: Pre Checkout MI300 Step | |
run: sudo chmod -R 777 ~/actions-runner/_work | |
- name: Increase system vm map | |
run: sudo sysctl -w vm.max_map_count=262144 | |
- name: Increase static TLS block limit | |
run: export GLIBC_TUNABLES=glibc.rtld.optional_static_tls=2048 | |
- name: Checking out this repo | |
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | |
with: | |
ref: staging-v5.1 | |
# -dt lets us run in an interactive, but detached mode which keeps the container alive | |
# so the actual testing job can use it using docker exec | |
- name: Build and run Docker | |
run: | | |
echo "always" | sudo tee /sys/kernel/mm/transparent_hugepage/enabled | |
cd code/stable-diffusion-xl | |
docker build --no-cache --platform linux/amd64 \ | |
--tag mlperf_rocm_sdxl:ci_micro_shortfin_${{ github.run_number }} \ | |
--file SDXL_inference/sdxl_harness_rocm_shortfin_from_source_iree.dockerfile . | |
docker run -dt --network=host --device=/dev/kfd --device=/dev/dri \ | |
--group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ | |
-v /data/mlperf_sdxl/data:/data \ | |
-v /data/mlperf_sdxl/models:/models \ | |
-v `pwd`/SDXL_inference/:/mlperf/harness \ | |
-e ROCR_VISIBLE_DEVICES=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63 \ | |
-e HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63 \ | |
-w /mlperf/harness \ | |
--name ci_container_micro_shortfin_${{ github.run_number }} \ | |
mlperf_rocm_sdxl:ci_micro_shortfin_${{ github.run_number }} | |
ml_perf_test: | |
needs: start_container | |
name: "ML Perf Harness Test" | |
runs-on: nodai-amdgpu-mi300-x86-64-perf | |
steps: | |
- name: Print Hashes of Dependencies | |
run: | | |
docker exec ci_container_micro_shortfin_${{ github.run_number }} ./get_hashes.sh | |
cat SDXL_inference/hashes.txt | |
- name: "Download the official model" | |
run: | | |
docker exec ci_container_micro_shortfin_${{ github.run_number }} ./download_model.sh | |
- name: "Download the official data" | |
run: | | |
docker exec ci_container_micro_shortfin_${{ github.run_number }} rm -rf /data/coco | |
docker exec ci_container_micro_shortfin_${{ github.run_number }} ./download_data.sh | |
- name: Preprocess the dataset | |
run: | | |
docker exec ci_container_micro_shortfin_${{ github.run_number }} python3.11 preprocess_data.py | |
- name: Compile the shark engines | |
run: | | |
docker exec ci_container_micro_shortfin_${{ github.run_number }} rm -rf /models/SDXL/official_pytorch/fp16/stable_diffusion_fp16/genfiles | |
docker exec ci_container_micro_shortfin_${{ github.run_number }} rm -rf /models/SDXL/official_pytorch/fp16/stable_diffusion_fp16/bin | |
docker exec ci_container_micro_shortfin_${{ github.run_number }} ./precompile_model_shortfin.sh \ | |
--gpu_batch_size 16 \ | |
--vae_batch_size 1 \ | |
--td_spec attention_and_matmul_spec_gfx942_MI325.mlir \ | |
--model_json sdxl_config_fp8_sched_unet_bs16.json | |
- name: Run Perf | |
run: | | |
docker exec ci_container_micro_shortfin_${{ github.run_number }} python3.11 harness.py \ | |
--devices "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63" \ | |
--gpu_batch_size 16 \ | |
--vae_batch_size 1 \ | |
--cores_per_devices 2 \ | |
--workers_per_device 1 \ | |
--fibers_per_device 1 \ | |
--qps 16 \ | |
--td_spec=attention_and_matmul_spec_gfx942_MI325.mlir \ | |
--model_json=sdxl_config_fp8_sched_unet_bs16.json \ | |
--scenario Offline \ | |
--test_mode PerformanceOnly \ | |
--logfile_outdir output_offline_perf \ | |
--verbose True | |
- name: Print Perf | |
run: | | |
cat SDXL_inference/output_offline_perf/mlperf_log_summary.txt | |
- name: "Upload perf artifact" | |
uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1 | |
with: | |
name: performance_artifact | |
path: SDXL_inference/output_offline_perf/mlperf_log_summary.txt | |
- name: Run Accuracy | |
run: | | |
docker exec ci_container_micro_shortfin_${{ github.run_number }} python3.11 harness.py \ | |
--devices "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63" \ | |
--gpu_batch_size 16 \ | |
--vae_batch_size 1 \ | |
--cores_per_devices 2 \ | |
--workers_per_device 1 \ | |
--fibers_per_device 1 \ | |
--qps 16 \ | |
--td_spec=attention_and_matmul_spec_gfx942_MI325.mlir \ | |
--model_json=sdxl_config_fp8_sched_unet_bs16.json \ | |
--scenario Offline \ | |
--test_mode AccuracyOnly \ | |
--logfile_outdir output_offline_acc \ | |
--verbose True | |
- name: Setup accuracy venv and check | |
run: | | |
docker exec ci_container_micro_shortfin_${{ github.run_number }} ./setup_accuracy_env.sh | |
docker exec ci_container_micro_shortfin_${{ github.run_number }} ./check_accuracy_scores.sh output_offline_acc/mlperf_log_accuracy.json | |
- name: Print Accuracy | |
run: | | |
cat SDXL_inference/output_offline_acc/coco-results.json | |
- name: "Upload accuracy artifact" | |
uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1 | |
with: | |
name: accuracy_artifact | |
path: SDXL_inference/output_offline_acc/coco-results.json | |
container_cleanup: | |
needs: ml_perf_test | |
name: "Docker Cleanup" | |
if: always() | |
runs-on: nodai-amdgpu-mi300-x86-64-perf | |
steps: | |
- name: Cleanup Docker | |
run: | | |
docker stop ci_container_micro_shortfin_${{ github.run_number }} | |
docker rm ci_container_micro_shortfin_${{ github.run_number }} | |
docker rmi mlperf_rocm_sdxl:ci_micro_shortfin_${{ github.run_number }} |