Summary
Jobs
metadata
bump-manifest
amd64
build-base
test-distribution (extra-only-distribution.sh)
test-distribution (mirror-only-distribution.sh)
test-distribution (upstream-only-distribution.sh)
test-distribution (local-patch-distribution.sh)
build-jax
build-triton
build-equinox
build-maxtext
build-levanter
build-upstream-t5x
build-upstream-pax
build-grok
build-gemma
launch-slurm-runner
jax-V100-unit-test
jax-A100-unit-test
launch-slurm-runner
pallas-V100-unit-test
pallas-A100-unit-test
test-triton
single-process-multi-device (1, 1, 2, 4)
maxtext-multinode (1, 1, 1, 1)
maxtext-multinode (1, 1, 8, 1)
maxtext-multinode (1, 1, 1, 8)
maxtext-multinode (1, 1, 4, 2)
maxtext-multinode (1, 2, 2, 2)
maxtext-multinode (1, 4, 2, 2)
test-maxtext-metrics
test-maxtext-summary
sitrep
test-maxtext-outcome
launch-slurm-runner
levanter-V100-unit-test
levanter-A100-unit-test
build-rosetta
t5x-multi-gpu (1P8G, 8)
t5x-multi-gpu (1P2G_fmha, 2, --enable-fmha 1)
t5x-multi-node (8G2N, 8, 2)
t5x-multi-node (8G2N_fmha, 8, 2, --enable-fmha 1)
test-upstream-t5x-metrics
test-upstream-t5x-summary
sitrep
test-upstream-t5x-outcome
build-rosetta
te-2GPU
te-4GPU
te-8GPU
sitrep
launch-slurm-runner
te-V100-unit-test
te-A100-unit-test
single-process-multi-device (1, 8, 1, 1)
single-process-multi-device (1, 1, 2, 4)
pax-multi-node (1DP1FSDP1TP1PP, 1, 1, 1, 1, 4)
pax-multi-node (8DP1FSDP1TP1PP, 1, 8, 1, 1, 4)
pax-multi-node (1DP8FSDP1TP1PP, 1, 1, 8, 1, 4)
pax-multi-node (2DP1FSDP1TP4PP, 4, 2, 1, 1, 4)
pax-multi-node (4DP1FSDP2TP1PP, 1, 4, 1, 2, 4)
pax-multi-node (16DP1FSDP1TP1PP, 1, 16, 1, 1, 4)
pax-multi-node (2DP1FSDP2TP4PP, 4, 2, 1, 2, 4)
pax-multi-node (LLaMA_eval, 1, 1, 8, 1, 4, true, --model-type LLaMA70BProxy --evaluate)
single-process-evaluation (1, 8, 1, 1)
test-upstream-pax-metrics
test-upstream-pax-summary
sitrep
test-upstream-pax-outcome
launch-slurm-runner
gemma-V100-unit-test
gemma-A100-unit-test
single-process-multi-device (1P1G_te-0, 1, --enable-te 0)
single-process-multi-device (1P8G_te-1, 8, --gin.train/utils.DatasetConfig.pack=False --gin.train...
multi-gpu-multi-node (2N8G-te-1, 8, 2, --gin.train/utils.DatasetConfig.pack=False --gin.train_eva...
multi-gpu-multi-node (2N2G_te-0, 2, 2, --enable-te 0)
vit-single-process-multi-device (8)
vit-multi-gpu-multi-node (1, 1)
vit-multi-gpu-multi-node (1, 2)
vit-multi-gpu-multi-node (8, 1)
vit-multi-gpu-multi-node (8, 2)
test-t5x-rosetta-metrics
test-t5x-rosetta-summary
sitrep
test-t5x-rosetta-outcome
collect-docker-tags
single-process-multi-device-te (1, 8, 1, 1)
single-process-multi-device-te (1, 1, 2, 4)
rosetta-pax-multi-node-te (1DP1FSDP1TP1PP_TE, 1, 1, 1, 1, 4)
rosetta-pax-multi-node-te (8DP1FSDP1TP1PP_TE, 1, 8, 1, 1, 4)
rosetta-pax-multi-node-te (1DP8FSDP1TP1PP_TE, 1, 1, 8, 1, 4)
rosetta-pax-multi-node-te (4DP1FSDP2TP1PP_TE, 1, 4, 1, 2, 4)
rosetta-pax-multi-node-te (16DP1FSDP1TP1PP_TE, 1, 16, 1, 1, 4)
rosetta-pax-multi-node-te (5B_fused_attn_1, 1, 1, 8, 1, 2, --model-type 5B)
rosetta-pax-multi-node-te (5B_fused_attn_0, 1, 1, 8, 1, 2, --model-type 5B --disable-fused-attn)
rosetta-pax-multi-node-te (LLaMA_eval_TE, 1, 1, 8, 1, 4, true, --model-type LLaMA70BProxy --evalu...
rosetta-pax-multi-node (1, 8, 1, 1)
rosetta-pax-multi-node (1, 4, 1, 2)
rosetta-pax-multi-node (4, 2, 1, 1)
rosetta-pax-multi-node (4, 2, 1, 2)
rosetta-pax-single-node-dropout-te (1, 8, 1, 1)
single-process-evaluation-te (1, 8, 1, 1)
test-pax-rosetta-metrics
test-pax-rosetta-summary
sitrep
test-pax-rosetta-outcome
arm64
merge-new-manifest
make-publish-configs
publish-containers (jax, mock-jax-mealkit, 500, ghcr.io/nvidia/jax-toolbox-internal:9453788456-ja...
publish-containers (equinox, mock-jax-mealkit, 500, ghcr.io/nvidia/jax-toolbox-internal:945378845...
publish-containers (maxtext, mock-jax-mealkit, 500, ghcr.io/nvidia/jax-toolbox-internal:945378845...
publish-containers (levanter, mock-jax-mealkit, 500, ghcr.io/nvidia/jax-toolbox-internal:94537884...
publish-containers (upstream-t5x, mock-jax-mealkit, 500, ghcr.io/nvidia/jax-toolbox-internal:9453...
publish-containers (upstream-pax, mock-jax-mealkit, 500, ghcr.io/nvidia/jax-toolbox-internal:9453...
publish-containers (t5x, mock-jax-mealkit, 500, ghcr.io/nvidia/jax-toolbox-internal:9453788456-t5...
publish-containers (pax, mock-jax-mealkit, 500, ghcr.io/nvidia/jax-toolbox-internal:9453788456-pa...
publish-containers (grok, mock-jax-mealkit, 500, ghcr.io/nvidia/jax-toolbox-internal:9453788456-g...
publish-containers (gemma, mock-jax-mealkit, 500, ghcr.io/nvidia/jax-toolbox-internal:9453788456-...
publish-containers (base, mock-jax, 800, ghcr.io/nvidia/jax-toolbox-internal:9453788456-base-amd6...
publish-containers (jax, mock-jax, 1000, ghcr.io/nvidia/jax-toolbox-internal:9453788456-jax-amd64...
publish-containers (equinox, mock-jax, 900, ghcr.io/nvidia/jax-toolbox-internal:9453788456-equino...
publish-containers (maxtext, mock-jax, 900, ghcr.io/nvidia/jax-toolbox-internal:9453788456-maxtex...
publish-containers (levanter, mock-jax, 900, ghcr.io/nvidia/jax-toolbox-internal:9453788456-levan...
publish-containers (upstream-t5x, mock-jax, 900, ghcr.io/nvidia/jax-toolbox-internal:9453788456-u...
publish-containers (upstream-pax, mock-jax, 900, ghcr.io/nvidia/jax-toolbox-internal:9453788456-u...
publish-containers (t5x, mock-jax, 900, ghcr.io/nvidia/jax-toolbox-internal:9453788456-t5x-amd64,...
publish-containers (pax, mock-jax, 900, ghcr.io/nvidia/jax-toolbox-internal:9453788456-pax-amd64,...
publish-containers (grok, mock-jax, 900, ghcr.io/nvidia/jax-toolbox-internal:9453788456-grok-amd6...
publish-containers (gemma, mock-jax, 900, ghcr.io/nvidia/jax-toolbox-internal:9453788456-gemma-am...
finalize
The logs for this run have expired and are no longer available.
You can’t perform that action at this time.