ML Perf Micro Shortfin #6

Workflow file for this run

.github/workflows/mlperf_harness_sdxl_micro_shortfin.yml at 980a3df

	# Copyright 2024 Advanced Micro Devices, Inc
	#
	# Licensed under the Apache License v2.0 with LLVM Exceptions.
	# See https://llvm.org/LICENSE.txt for license information.
	# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	name: ML Perf Micro Shortfin
	on:
	workflow_dispatch:
	pull_request:
	# Run at 7:00 PM PST.
	schedule:
	- cron: '0 3 * * *'
	jobs:
	start_container:
	name: "Start Container"
	runs-on: linux-mi300-8gpu-ossci-nod-ai
	steps:
	- name: Pre Checkout MI300 Step
	run: sudo chmod -R 777 ~/actions-runner/_work
	- name: Increase system vm map
	run: sudo sysctl -w vm.max_map_count=262144
	- name: Increase static TLS block limit
	run: export GLIBC_TUNABLES=glibc.rtld.optional_static_tls=2048
	- name: Checking out this repo
	uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
	with:
	ref: staging-v5.1
	# -dt lets us run in an interactive, but detached mode which keeps the container alive
	# so the actual testing job can use it using docker exec
	- name: Build and run Docker
	run: \|
	echo "always" \| sudo tee /sys/kernel/mm/transparent_hugepage/enabled
	cd code/stable-diffusion-xl
	docker build --no-cache --platform linux/amd64 \
	--tag mlperf_rocm_sdxl:ci_micro_shortfin_${{ github.run_number }} \
	--file SDXL_inference/sdxl_harness_rocm_shortfin_from_source_iree.dockerfile .
	docker run -dt --network=host --device=/dev/kfd --device=/dev/dri \
	--group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
	-v /data/mlperf_sdxl/data:/data \
	-v /data/mlperf_sdxl/models:/models \
	-v `pwd`/SDXL_inference/:/mlperf/harness \
	-e ROCR_VISIBLE_DEVICES=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63 \
	-e HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63 \
	-w /mlperf/harness \
	--name ci_container_micro_shortfin_${{ github.run_number }} \
	mlperf_rocm_sdxl:ci_micro_shortfin_${{ github.run_number }}
	ml_perf_test:
	needs: start_container
	name: "ML Perf Harness Test"
	runs-on: nodai-amdgpu-mi300-x86-64-perf
	steps:
	- name: Print Hashes of Dependencies
	run: \|
	docker exec ci_container_micro_shortfin_${{ github.run_number }} ./get_hashes.sh
	cat SDXL_inference/hashes.txt
	- name: "Download the official model"
	run: \|
	docker exec ci_container_micro_shortfin_${{ github.run_number }} ./download_model.sh
	- name: "Download the official data"
	run: \|
	docker exec ci_container_micro_shortfin_${{ github.run_number }} rm -rf /data/coco
	docker exec ci_container_micro_shortfin_${{ github.run_number }} ./download_data.sh
	- name: Preprocess the dataset
	run: \|
	docker exec ci_container_micro_shortfin_${{ github.run_number }} python3.11 preprocess_data.py
	- name: Compile the shark engines
	run: \|
	docker exec ci_container_micro_shortfin_${{ github.run_number }} rm -rf /models/SDXL/official_pytorch/fp16/stable_diffusion_fp16/genfiles
	docker exec ci_container_micro_shortfin_${{ github.run_number }} rm -rf /models/SDXL/official_pytorch/fp16/stable_diffusion_fp16/bin
	docker exec ci_container_micro_shortfin_${{ github.run_number }} ./precompile_model_shortfin.sh \
	--gpu_batch_size 16 \
	--vae_batch_size 1 \
	--td_spec attention_and_matmul_spec_gfx942_MI325.mlir \
	--model_json sdxl_config_fp8_sched_unet_bs16.json
	- name: Run Perf
	run: \|
	docker exec ci_container_micro_shortfin_${{ github.run_number }} python3.11 harness.py \
	--devices "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63" \
	--gpu_batch_size 16 \
	--vae_batch_size 1 \
	--cores_per_devices 2 \
	--workers_per_device 1 \
	--fibers_per_device 1 \
	--qps 16 \
	--td_spec=attention_and_matmul_spec_gfx942_MI325.mlir \
	--model_json=sdxl_config_fp8_sched_unet_bs16.json \
	--scenario Offline \
	--test_mode PerformanceOnly \
	--logfile_outdir output_offline_perf \
	--verbose True
	- name: Print Perf
	run: \|
	cat SDXL_inference/output_offline_perf/mlperf_log_summary.txt
	- name: "Upload perf artifact"
	uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1
	with:
	name: performance_artifact
	path: SDXL_inference/output_offline_perf/mlperf_log_summary.txt
	- name: Run Accuracy
	run: \|
	docker exec ci_container_micro_shortfin_${{ github.run_number }} python3.11 harness.py \
	--devices "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63" \
	--gpu_batch_size 16 \
	--vae_batch_size 1 \
	--cores_per_devices 2 \
	--workers_per_device 1 \
	--fibers_per_device 1 \
	--qps 16 \
	--td_spec=attention_and_matmul_spec_gfx942_MI325.mlir \
	--model_json=sdxl_config_fp8_sched_unet_bs16.json \
	--scenario Offline \
	--test_mode AccuracyOnly \
	--logfile_outdir output_offline_acc \
	--verbose True
	- name: Setup accuracy venv and check
	run: \|
	docker exec ci_container_micro_shortfin_${{ github.run_number }} ./setup_accuracy_env.sh
	docker exec ci_container_micro_shortfin_${{ github.run_number }} ./check_accuracy_scores.sh output_offline_acc/mlperf_log_accuracy.json
	- name: Print Accuracy
	run: \|
	cat SDXL_inference/output_offline_acc/coco-results.json
	- name: "Upload accuracy artifact"
	uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1
	with:
	name: accuracy_artifact
	path: SDXL_inference/output_offline_acc/coco-results.json
	container_cleanup:
	needs: ml_perf_test
	name: "Docker Cleanup"
	if: always()
	runs-on: nodai-amdgpu-mi300-x86-64-perf
	steps:
	- name: Cleanup Docker
	run: \|
	docker stop ci_container_micro_shortfin_${{ github.run_number }}
	docker rm ci_container_micro_shortfin_${{ github.run_number }}
	docker rmi mlperf_rocm_sdxl:ci_micro_shortfin_${{ github.run_number }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

ML Perf Micro Shortfin #6

Workflow file

ML Perf Micro Shortfin #6

Uh oh!

Jobs

Run details

Workflow file for this run