File tree 4 files changed +54
-2
lines changed
examples/apple/coreml/llama
4 files changed +54
-2
lines changed Original file line number Diff line number Diff line change
1
+ #! /bin/bash
2
+ # Copyright (c) Qualcomm Innovation Center, Inc.
3
+ # All rights reserved
4
+ #
5
+ # This source code is licensed under the BSD-style license found in the
6
+ # LICENSE file in the root directory of this source tree.
7
+
8
+ set -exu
9
+
10
+ source " $( dirname " ${BASH_SOURCE[0]} " ) /utils.sh"
11
+
12
+ export EXECUTORCH_ROOT=" $( dirname " ${BASH_SOURCE[0]} " ) /../.."
13
+
14
+ if [[ -z " ${PYTHON_EXECUTABLE:- } " ]]; then
15
+ PYTHON_EXECUTABLE=python3
16
+ fi
17
+
18
+ which " ${PYTHON_EXECUTABLE} "
19
+
20
+ pushd $EXECUTORCH_ROOT /examples/apple/coreml/llama
21
+
22
+ # Download stories llama110m artifacts
23
+ download_stories_model_artifacts
24
+
25
+ python export.py -n model.pte -p params.json -c stories110M.pt --seq_length 32 --max_seq_length 64 --dtype fp16 --coreml-quantize c4w
26
+
27
+ popd
Original file line number Diff line number Diff line change @@ -229,6 +229,28 @@ jobs:
229
229
# see if we can import the module successfully
230
230
${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')"
231
231
232
+ test-static-llama-ane :
233
+ name : test-static-llama-ane
234
+ uses : pytorch/test-infra/.github/workflows/macos_job.yml@main
235
+ with :
236
+ runner : macos-m1-stable
237
+ python-version : ' 3.11'
238
+ submodules : ' true'
239
+ ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
240
+ script : |
241
+ set -eux
242
+ bash .ci/scripts/setup-conda.sh
243
+ eval "$(conda shell.bash hook)"
244
+
245
+ # Install requirements
246
+ sh install_requirements.sh
247
+ sh backends/apple/coreml/scripts/install_requirements.sh
248
+ python install_executorch.py --pybind coreml
249
+ sh examples/models/llama/install_requirements.sh
250
+
251
+ # Test ANE llama
252
+ sh .ci/scripts/test_ane_static_llama.sh
253
+
232
254
test-llama-runner-macos :
233
255
name : test-llama-runner-mac
234
256
uses : pytorch/test-infra/.github/workflows/macos_job.yml@main
Original file line number Diff line number Diff line change @@ -203,6 +203,7 @@ def main() -> None:
203
203
torch .ops .aten .scaled_dot_product_attention .default ,
204
204
# preserve norm op for numerical stability
205
205
torch .ops .aten .linalg_vector_norm .default ,
206
+ torch .ops .aten .reciprocal .default ,
206
207
],
207
208
compile_config = EdgeCompileConfig (
208
209
_check_ir_validity = False ,
Original file line number Diff line number Diff line change @@ -134,8 +134,10 @@ def _norm(self, x):
134
134
# We have yet to do large scale evaluations on the numeric stability of this solution, but note that
135
135
# it appears better than what exists currently (removing FP32 casts and using FP16)
136
136
rms_norm_eps0 = (
137
- x * torch .sqrt (torch .tensor (self .dim , dtype = x .dtype ))
138
- ) / torch .linalg .vector_norm (x , dim = - 1 , keepdim = True )
137
+ x
138
+ * torch .sqrt (torch .tensor (self .dim , dtype = x .dtype ))
139
+ * torch .reciprocal (torch .linalg .vector_norm (x , dim = - 1 , keepdim = True ))
140
+ )
139
141
return rms_norm_eps0
140
142
141
143
def forward (self , x ):
You can’t perform that action at this time.
0 commit comments