Skip to content

Commit 7aa6494

Browse files
authored
Fix ANE llama export (#8904)
* up * up * up * up * up * up * up * up * up * up * up * up * up
1 parent 09ad20a commit 7aa6494

File tree

4 files changed

+54
-2
lines changed

4 files changed

+54
-2
lines changed

.ci/scripts/test_ane_static_llama.sh

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/bin/bash
2+
# Copyright (c) Qualcomm Innovation Center, Inc.
3+
# All rights reserved
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -exu
9+
10+
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
11+
12+
export EXECUTORCH_ROOT="$(dirname "${BASH_SOURCE[0]}")/../.."
13+
14+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
15+
PYTHON_EXECUTABLE=python3
16+
fi
17+
18+
which "${PYTHON_EXECUTABLE}"
19+
20+
pushd $EXECUTORCH_ROOT/examples/apple/coreml/llama
21+
22+
# Download stories llama110m artifacts
23+
download_stories_model_artifacts
24+
25+
python export.py -n model.pte -p params.json -c stories110M.pt --seq_length 32 --max_seq_length 64 --dtype fp16 --coreml-quantize c4w
26+
27+
popd

.github/workflows/trunk.yml

+22
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,28 @@ jobs:
229229
# see if we can import the module successfully
230230
${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')"
231231
232+
test-static-llama-ane:
233+
name: test-static-llama-ane
234+
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
235+
with:
236+
runner: macos-m1-stable
237+
python-version: '3.11'
238+
submodules: 'true'
239+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
240+
script: |
241+
set -eux
242+
bash .ci/scripts/setup-conda.sh
243+
eval "$(conda shell.bash hook)"
244+
245+
# Install requirements
246+
sh install_requirements.sh
247+
sh backends/apple/coreml/scripts/install_requirements.sh
248+
python install_executorch.py --pybind coreml
249+
sh examples/models/llama/install_requirements.sh
250+
251+
# Test ANE llama
252+
sh .ci/scripts/test_ane_static_llama.sh
253+
232254
test-llama-runner-macos:
233255
name: test-llama-runner-mac
234256
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main

examples/apple/coreml/llama/export.py

+1
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ def main() -> None:
203203
torch.ops.aten.scaled_dot_product_attention.default,
204204
# preserve norm op for numerical stability
205205
torch.ops.aten.linalg_vector_norm.default,
206+
torch.ops.aten.reciprocal.default,
206207
],
207208
compile_config=EdgeCompileConfig(
208209
_check_ir_validity=False,

examples/apple/coreml/llama/llama_transformer.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,10 @@ def _norm(self, x):
134134
# We have yet to do large scale evaluations on the numeric stability of this solution, but note that
135135
# it appears better than what exists currently (removing FP32 casts and using FP16)
136136
rms_norm_eps0 = (
137-
x * torch.sqrt(torch.tensor(self.dim, dtype=x.dtype))
138-
) / torch.linalg.vector_norm(x, dim=-1, keepdim=True)
137+
x
138+
* torch.sqrt(torch.tensor(self.dim, dtype=x.dtype))
139+
* torch.reciprocal(torch.linalg.vector_norm(x, dim=-1, keepdim=True))
140+
)
139141
return rms_norm_eps0
140142

141143
def forward(self, x):

0 commit comments

Comments
 (0)