ML Perf Micro Shortfin nogil #7
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright 2024 Advanced Micro Devices, Inc | |
# | |
# Licensed under the Apache License v2.0 with LLVM Exceptions. | |
# See https://llvm.org/LICENSE.txt for license information. | |
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |
name: ML Perf Micro Shortfin nogil | |
on: | |
workflow_dispatch: | |
pull_request: | |
# Run at 8:00 PM PST. | |
schedule: | |
- cron: '0 4 * * *' | |
jobs: | |
start_container: | |
name: "Start Container" | |
runs-on: linux-mi300-8gpu-ossci-nod-ai | |
steps: | |
- name: Increase system vm map | |
run: sudo sysctl -w vm.max_map_count=262144 | |
- name: Increase static TLS block limit | |
run: export GLIBC_TUNABLES=glibc.rtld.optional_static_tls=2048 | |
- name: Checking out this repo | |
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | |
with: | |
ref: sdxl-mlperf-v5.1-nogil | |
# -dt lets us run in an interactive, but detached mode which keeps the container alive | |
# so the actual testing job can use it using docker exec | |
- name: Build and run Docker | |
run: | | |
cd code/stable-diffusion-xl | |
docker build --no-cache --platform linux/amd64 \ | |
--tag mlperf_rocm_sdxl:ci_micro_shortfin_nogil_${{ github.run_number }} \ | |
--file SDXL_inference/sdxl_harness_rocm_shortfin_no_gil.dockerfile . | |
docker run -dt --network=host --device=/dev/kfd --device=/dev/dri \ | |
--group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ | |
-v /data/mlperf_sdxl/data:/data \ | |
-v /data/mlperf_sdxl/models:/models \ | |
-v `pwd`/SDXL_inference/:/mlperf/harness \ | |
-e ROCR_VISIBLE_DEVICES=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63 \ | |
-e HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63 \ | |
-w /mlperf/harness \ | |
--name ci_container_micro_shortfin_nogil_${{ github.run_number }} \ | |
mlperf_rocm_sdxl:ci_micro_shortfin_nogil_${{ github.run_number }} | |
ml_perf_test: | |
needs: start_container | |
name: "ML Perf Harness Test" | |
runs-on: linux-mi300-8gpu-ossci-nod-ai | |
steps: | |
- name: Print Hashes of Dependencies | |
run: | | |
docker exec ci_container_micro_shortfin_nogil_${{ github.run_number }} ./get_hashes.sh | |
cat SDXL_inference/hashes.txt | |
- name: "Check if artifacts exist" | |
run: | | |
cat /models/SDXL/official_pytorch/fp16/stable_diffusion_fp16/genfiles/*bs16* | |
- name: Run Perf | |
run: | | |
docker exec ci_container_micro_shortfin_nogil_${{ github.run_number }} PYTHON_GIL=0 python harness.py \ | |
--devices "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63" \ | |
--gpu_batch_size 16 \ | |
--vae_batch_size 1 \ | |
--cores_per_devices 2 \ | |
--workers_per_device 1 \ | |
--fibers_per_device 1 \ | |
--qps 16 \ | |
--td_spec=attention_and_matmul_spec_gfx942_MI325.mlir \ | |
--model_json=sdxl_config_fp8_sched_unet_bs16.json \ | |
--scenario Offline \ | |
--test_mode PerformanceOnly \ | |
--logfile_outdir output_offline_perf_nogil \ | |
--verbose True | |
- name: Print Perf | |
run: | | |
cat SDXL_inference/output_offline_perf_nogil/mlperf_log_summary.txt | |
- name: "Upload perf artifact" | |
uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1 | |
with: | |
name: performance_artifact | |
path: SDXL_inference/output_offline_perf_nogil/mlperf_log_summary.txt | |
- name: Run Accuracy | |
run: | | |
docker exec ci_container_micro_shortfin_nogil_${{ github.run_number }} PYTHON_GIL=0 python harness.py \ | |
--devices "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63" \ | |
--gpu_batch_size 16 \ | |
--vae_batch_size 1 \ | |
--cores_per_devices 2 \ | |
--workers_per_device 1 \ | |
--fibers_per_device 1 \ | |
--qps 16 \ | |
--td_spec=attention_and_matmul_spec_gfx942_MI325.mlir \ | |
--model_json=sdxl_config_fp8_sched_unet_bs16.json \ | |
--scenario Offline \ | |
--test_mode AccuracyOnly \ | |
--logfile_outdir output_offline_acc_nogil \ | |
--verbose True | |
- name: Setup accuracy venv and check | |
run: | | |
docker exec ci_container_micro_shortfin_nogil_${{ github.run_number }} ./setup_accuracy_env.sh | |
docker exec ci_container_micro_shortfin_nogil_${{ github.run_number }} ./check_accuracy_scores.sh output_offline_acc_nogil/mlperf_log_accuracy.json | |
- name: Print Accuracy | |
run: | | |
cat SDXL_inference/output_offline_acc_nogil/coco-results.json | |
- name: "Upload accuracy artifact" | |
uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1 | |
with: | |
name: accuracy_artifact | |
path: SDXL_inference/output_offline_acc_nogil/coco-results.json | |
container_cleanup: | |
needs: ml_perf_test | |
name: "Docker Cleanup" | |
if: always() | |
runs-on: linux-mi300-8gpu-ossci-nod-ai | |
steps: | |
- name: Cleanup Docker | |
run: | | |
docker stop ci_container_micro_shortfin_nogil_${{ github.run_number }} | |
docker rm ci_container_micro_shortfin_nogil_${{ github.run_number }} | |
docker rmi mlperf_rocm_sdxl:ci_micro_shortfin_nogil_${{ github.run_number }} |