Skip to content

Commit f388177

Browse files
committed
Update
[ghstack-poisoned]
2 parents 70a7096 + cca6917 commit f388177

File tree

72 files changed

+3716
-1415
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+3716
-1415
lines changed

.ci/scripts/test_model.sh

+8
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,14 @@ test_model() {
100100
rm "./${MODEL_NAME}.pte"
101101
return # Skip running with portable executor runnner since portable doesn't support Qwen's biased linears.
102102
fi
103+
if [[ "${MODEL_NAME}" == "phi4_mini" ]]; then
104+
# Install requirements for export_llama
105+
bash examples/models/llama/install_requirements.sh
106+
# Test export_llama script: python3 -m examples.models.llama.export_llama.
107+
"${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/phi-4-mini/config.json
108+
run_portable_executor_runner
109+
rm "./${MODEL_NAME}.pte"
110+
fi
103111

104112
# Export a basic .pte and run the model.
105113
"${PYTHON_EXECUTABLE}" -m examples.portable.scripts.export --model_name="${MODEL_NAME}" "${STRICT}"

backends/arm/scripts/build_executorch_runner.sh

+20-5
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,9 @@ toolchain_cmake=${et_root_dir}/examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmak
1414
pte_file=""
1515
target="ethos-u55-128"
1616
build_type="Release"
17-
system_config=""
1817
bundleio=false
18+
system_config=""
19+
memory_mode=""
1920
build_with_etdump=false
2021
extra_build_flags=""
2122
output_folder_set=false
@@ -32,9 +33,12 @@ help() {
3233
echo " --pte=<PTE_FILE> pte file (genrated by the aot_arm_compier from the model to include in the elf"
3334
echo " --target=<TARGET> Target to build and run for Default: ${target}"
3435
echo " --build_type=<TYPE> Build with Release, Debug or RelWithDebInfo, default is ${build_type}"
35-
echo " --system_config=<CONFIG> System configuration to select from the Vela configuration file (see vela.ini). Default: Ethos_U55_High_End_Embedded for EthosU55 targets, Ethos_U85_SYS_DRAM_Mid for EthosU85 targets."
36-
echo " NOTE: If given, this option must match the given target. This option also sets timing adapter values customized for specific hardware, see ./executor_runner/CMakeLists.txt."
3736
echo " --bundleio Support both pte and Bundle IO bpte using Devtools BundelIO with Input/RefOutput included"
37+
echo " --system_config=<CONFIG> System configuration to select from the Vela configuration file (see vela.ini). Default: Ethos_U55_High_End_Embedded for EthosU55 targets, Ethos_U85_SYS_DRAM_Mid for EthosU85 targets."
38+
echo " NOTE: If given, this option must match the given target. This option along with the memory_mode sets timing adapter values customized for specific hardware, see ./executor_runner/CMakeLists.txt."
39+
echo " --memory_mode=<CONFIG> Vela memory mode, used for setting the Timing Adapter parameters of the Corstone platforms."
40+
echo " Valid values are Shared_Sram(for Ethos-U55, Ethos-U65, Ethos-85), Sram_Only(for Ethos-U55, Ethos-U65, Ethos-U85) or Dedicated_Sram(for Ethos-U65, Ethos-U85)."
41+
echo " Default: Shared_Sram for the Ethos-U55 and Sram_Only for the Ethos-U85"
3842
echo " --etdump Adds Devtools etdump support to track timing, etdump area will be base64 encoded in the log"
3943
echo " --extra_build_flags=<FLAGS> Extra flags to pass to cmake like -DET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE=60000 Default: none "
4044
echo " --output=<FOLDER> Output folder Default: <MODEL>/<MODEL>_<TARGET INFO>.pte"
@@ -49,8 +53,9 @@ for arg in "$@"; do
4953
--pte=*) pte_file="${arg#*=}";;
5054
--target=*) target="${arg#*=}";;
5155
--build_type=*) build_type="${arg#*=}";;
52-
--system_config=*) system_config="${arg#*=}";;
5356
--bundleio) bundleio=true ;;
57+
--system_config=*) system_config="${arg#*=}";;
58+
--memory_mode=*) memory_mode="${arg#*=}";;
5459
--etdump) build_with_etdump=true ;;
5560
--extra_build_flags=*) extra_build_flags="${arg#*=}";;
5661
--output=*) output_folder="${arg#*=}" ; output_folder_set=true ;;
@@ -83,6 +88,15 @@ then
8388
fi
8489
fi
8590

91+
if [[ ${memory_mode} == "" ]]
92+
then
93+
memory_mode="Shared_Sram"
94+
if [[ ${target} =~ "ethos-u85" ]]
95+
then
96+
memory_mode="Sram_Only"
97+
fi
98+
fi
99+
86100
output_folder=$(realpath ${output_folder})
87101

88102
if [[ ${target} == *"ethos-u55"* ]]; then
@@ -91,7 +105,7 @@ else
91105
target_cpu=cortex-m85
92106
fi
93107
echo "--------------------------------------------------------------------------------"
94-
echo "Build Arm Baremetal executor_runner for ${target} with ${pte_file} using ${system_config} ${extra_build_flags} to '${output_folder}/cmake-out'"
108+
echo "Build Arm Baremetal executor_runner for ${target} with ${pte_file} using ${system_config} ${memory_mode} ${extra_build_flags} to '${output_folder}/cmake-out'"
95109
echo "--------------------------------------------------------------------------------"
96110

97111
cd ${et_root_dir}/examples/arm/executor_runner
@@ -120,6 +134,7 @@ cmake \
120134
${build_with_etdump_flags} \
121135
-DPYTHON_EXECUTABLE=$(which python3) \
122136
-DSYSTEM_CONFIG=${system_config} \
137+
-DMEMORY_MODE=${memory_mode} \
123138
${extra_build_flags} \
124139
-B ${output_folder}/cmake-out
125140

backends/cadence/aot/functions_hifi.yaml

+13-3
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
- op: _softmax.out
2121
kernels:
2222
- arg_meta: null
23-
kernel_name: cadence::impl::HiFi::softmax_out
23+
kernel_name: cadence::impl::HiFi::_softmax_out
2424

2525
- op: atan2.out
2626
kernels:
@@ -100,7 +100,7 @@
100100
- op: mean.out
101101
kernels:
102102
- arg_meta: null
103-
kernel_name: cadence::impl::HiFi::mean_dim_out
103+
kernel_name: cadence::impl::HiFi::mean_out
104104

105105
- op: minimum.out
106106
kernels:
@@ -175,7 +175,7 @@
175175
- op: where.self_out
176176
kernels:
177177
- arg_meta: null
178-
kernel_name: cadence::impl::HiFi::where_out
178+
kernel_name: cadence::impl::HiFi::where_self_out
179179

180180
# custom ops
181181
- func: cadence::quantize_per_tensor.out(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype, *, Tensor(a!) out) -> Tensor(a!)
@@ -189,6 +189,11 @@
189189
kernels:
190190
- arg_meta: null
191191
kernel_name: cadence::impl::HiFi::dequantize_per_tensor_out
192+
193+
- func: cadence::quantized_conv.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, bool channel_last=False, *, Tensor(a!) out) -> Tensor(a!)
194+
kernels:
195+
- arg_meta: null
196+
kernel_name: cadence::impl::HiFi::quantized_conv_out
192197

193198
- func: cadence::quantized_layer_norm.out(Tensor input, Tensor in_scale, Tensor in_zero_point, int[] normalized_shape, Tensor weight, Tensor bias, float eps, float output_scale, int output_zero_point, *, Tensor(a!) out) -> Tensor(a!)
194199
kernels:
@@ -209,6 +214,11 @@
209214
- arg_meta: null
210215
kernel_name: cadence::impl::HiFi::quantized_linear_per_tensor_out
211216

217+
- func: cadence::quantized_relu_per_tensor.out(Tensor X, Tensor X_zero_point, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)
218+
kernels:
219+
- arg_meta: null
220+
kernel_name: cadence::impl::HiFi::quantized_relu_per_tensor_out
221+
212222
- func: cadence::quantized_relu.out(Tensor X, Tensor X_zero_point, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)
213223
kernels:
214224
- arg_meta: null

backends/cadence/hifi/operators/CMakeLists.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,8 @@ target_include_directories(
7878
# Custom ops that are needed to run the test model.
7979
add_library(
8080
custom_ops "op_quantized_linear_out.cpp" "op_quantized_layer_norm.cpp"
81-
"op_quantize_per_tensor.cpp" "op_quantized_relu_out.cpp" "op_dequantize_per_tensor.cpp" "op_quantized_fully_connected_out"
81+
"op_quantize_per_tensor.cpp" "op_quantized_relu_out.cpp" "op_dequantize_per_tensor.cpp"
82+
"op_quantized_conv_out.cpp" "op_quantized_fully_connected_out"
8283
)
8384
target_include_directories(
8485
custom_ops PUBLIC ${ROOT_DIR}/.. ${CMAKE_BINARY_DIR}

0 commit comments

Comments
 (0)