Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit c222208

Browse files
authoredJan 8, 2024
feat mask for disable threading, make some extractor setter no-op, update doc (Tencent#5270)
1 parent 237f45f commit c222208

File tree

10 files changed

+150
-190
lines changed

10 files changed

+150
-190
lines changed
 

‎build-android.cmd

+4-22
Original file line numberDiff line numberDiff line change
@@ -2,56 +2,38 @@
22
@ECHO OFF
33
@SETLOCAL
44
@SET ANDROID_NDK=<your-ndk-root_path, such as"E:\android-ndk-r18b">
5-
@SET VULKAN_SDK=<your-vulkan-toolkit_path, such as"D:\VulkanSDK\1.1.106.0\Bin">
65

76
:: Set ninja.exe
87
:: @SET NINJA_EXE=<your-ninja-exe_path, such as"D:\android\sdk\cmake\3.10.2.4988404\bin\ninja.exe">
98

109
:: android armv7
11-
mkdir build-android-armv7
12-
pushd build-android-armv7
13-
cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-21 ..
14-
:: cmake -G Ninja -DCMAKE_TOOLCHAIN_FILE="%ANDROID_NDK%/build/cmake/android.toolchain.cmake" -DCMAKE_MAKE_PROGRAM=%NINJA_EXE% -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-21 ..
15-
cmake --build . --parallel %NUMBER_OF_PROCESSORS%
16-
cmake --build . --target install
17-
popd
18-
19-
:: android armv7 vulkan
2010
mkdir build-android-armv7-vulkan
2111
pushd build-android-armv7-vulkan
22-
cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-24 -DNCNN_VULKAN=ON ..
12+
cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-19 -DNCNN_VULKAN=ON ..
2313
cmake --build . --parallel %NUMBER_OF_PROCESSORS%
2414
cmake --build . --target install
2515
popd
2616

2717
:: android aarch64
28-
mkdir build-android-aarch64
29-
pushd build-android-aarch64
30-
cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-24 ..
31-
cmake --build . --parallel %NUMBER_OF_PROCESSORS%
32-
cmake --build . --target install
33-
popd
34-
35-
:: android aarch64 vulkan
3618
mkdir build-android-aarch64-vulkan
3719
pushd build-android-aarch64-vulkan
38-
cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-24 -DNCNN_VULKAN=ON ..
20+
cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-21 -DNCNN_VULKAN=ON ..
3921
cmake --build . --parallel %NUMBER_OF_PROCESSORS%
4022
cmake --build . --target install
4123
popd
4224

4325
:: android x86
4426
mkdir build-android-x86
4527
pushd build-android-x86
46-
cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="x86" -DANDROID_PLATFORM=android-19 ..
28+
cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="x86" -DANDROID_PLATFORM=android-19 -DNCNN_VULKAN=ON ..
4729
cmake --build . --parallel %NUMBER_OF_PROCESSORS%
4830
cmake --build . --target install
4931
popd
5032

5133
:: android x86_64
5234
mkdir build-android-x86_64
5335
pushd build-android-x86_64
54-
cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="x86_64" -DANDROID_PLATFORM=android-21 ..
36+
cmake -G "Unix Makefiles" -DCMAKE_TOOLCHAIN_FILE=%ANDROID_NDK%/build/cmake/android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%ANDROID_NDK%/prebuilt/windows-x86_64/bin/make.exe" -DANDROID_ABI="x86_64" -DANDROID_PLATFORM=android-21 -DNCNN_VULKAN=ON ..
5537
cmake --build . --parallel %NUMBER_OF_PROCESSORS%
5638
cmake --build . --target install
5739
popd

‎build.sh

+12-108
Original file line numberDiff line numberDiff line change
@@ -1,73 +1,41 @@
11
#!/usr/bin/env bash
22

3+
##### android armv7 without neon
4+
mkdir -p build-android-armv7-without-neon
5+
pushd build-android-armv7-without-neon
6+
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=OFF -DANDROID_PLATFORM=android-19 -DNCNN_VULKAN=ON ..
7+
make -j4
8+
make install
9+
popd
10+
311
##### android armv7
412
mkdir -p build-android-armv7
513
pushd build-android-armv7
6-
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-19 ..
14+
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-19 -DNCNN_VULKAN=ON ..
715
make -j4
816
make install
917
popd
1018

1119
##### android aarch64
1220
mkdir -p build-android-aarch64
1321
pushd build-android-aarch64
14-
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-21 ..
15-
make -j4
16-
make install
17-
popd
18-
19-
##### android armv7 without neon
20-
mkdir -p build-android-armv7-without-neon
21-
pushd build-android-armv7-without-neon
22-
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=OFF -DANDROID_PLATFORM=android-19 ..
22+
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-21 -DNCNN_VULKAN=ON ..
2323
make -j4
2424
make install
2525
popd
2626

2727
##### android x86
2828
mkdir -p build-android-x86
2929
pushd build-android-x86
30-
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="x86" -DANDROID_PLATFORM=android-19 ..
30+
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="x86" -DANDROID_PLATFORM=android-19 -DNCNN_VULKAN=ON ..
3131
make -j4
3232
make install
3333
popd
3434

3535
##### android x86_64
3636
mkdir -p build-android-x86_64
3737
pushd build-android-x86_64
38-
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="x86_64" -DANDROID_PLATFORM=android-21 ..
39-
make -j4
40-
make install
41-
popd
42-
43-
##### android armv7 vulkan
44-
mkdir -p build-android-armv7-vulkan
45-
pushd build-android-armv7-vulkan
46-
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="armeabi-v7a" -DANDROID_ARM_NEON=ON -DANDROID_PLATFORM=android-24 -DNCNN_VULKAN=ON ..
47-
make -j4
48-
make install
49-
popd
50-
51-
##### android aarch64 vulkan
52-
mkdir -p build-android-aarch64-vulkan
53-
pushd build-android-aarch64-vulkan
54-
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-24 -DNCNN_VULKAN=ON ..
55-
make -j4
56-
make install
57-
popd
58-
59-
##### android x86 vulkan
60-
mkdir -p build-android-x86-vulkan
61-
pushd build-android-x86-vulkan
62-
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="x86" -DANDROID_PLATFORM=android-24 -DNCNN_VULKAN=ON ..
63-
make -j4
64-
make install
65-
popd
66-
67-
##### android x86_64 vulkan
68-
mkdir -p build-android-x86_64-vulkan
69-
pushd build-android-x86_64-vulkan
70-
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="x86_64" -DANDROID_PLATFORM=android-24 -DNCNN_VULKAN=ON ..
38+
cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="x86_64" -DANDROID_PLATFORM=android-21 -DNCNN_VULKAN=ON ..
7139
make -j4
7240
make install
7341
popd
@@ -144,70 +112,6 @@ make -j4
144112
make install
145113
popd
146114

147-
##### ios armv7 arm64
148-
mkdir -p build-ios
149-
pushd build-ios
150-
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iosxc.toolchain.cmake -DENABLE_BITCODE=OFF ..
151-
make -j4
152-
make install
153-
popd
154-
155-
##### ios armv7 arm64 bitcode
156-
mkdir -p build-ios-bitcode
157-
pushd build-ios-bitcode
158-
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iosxc.toolchain.cmake -DENABLE_BITCODE=ON ..
159-
make -j4
160-
make install
161-
popd
162-
163-
##### ios simulator i386 x86_64
164-
mkdir -p build-ios-sim
165-
pushd build-ios-sim
166-
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iossimxc.toolchain.cmake -DENABLE_BITCODE=OFF ..
167-
make -j4
168-
make install
169-
popd
170-
171-
##### ios simulator i386 x86_64 bitcode
172-
mkdir -p build-ios-sim-bitcode
173-
pushd build-ios-sim-bitcode
174-
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iossimxc.toolchain.cmake -DENABLE_BITCODE=ON ..
175-
make -j4
176-
make install
177-
popd
178-
179-
##### ios arm64 vulkan
180-
mkdir -p build-ios-vulkan
181-
pushd build-ios-vulkan
182-
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iosxc-arm64.toolchain.cmake -DENABLE_BITCODE=OFF -DVulkan_INCLUDE_DIR=${VULKAN_SDK}/MoltenVK/include -DVulkan_LIBRARY=${VULKAN_SDK}/MoltenVK/iOS/MoltenVK.framework/MoltenVK -DNCNN_VULKAN=ON ..
183-
make -j4
184-
make install
185-
popd
186-
187-
##### ios arm64 vulkan bitcode
188-
mkdir -p build-ios-vulkan-bitcode
189-
pushd build-ios-vulkan-bitcode
190-
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iosxc-arm64.toolchain.cmake -DENABLE_BITCODE=ON -DVulkan_INCLUDE_DIR=${VULKAN_SDK}/MoltenVK/include -DVulkan_LIBRARY=${VULKAN_SDK}/MoltenVK/iOS/MoltenVK.framework/MoltenVK -DNCNN_VULKAN=ON ..
191-
make -j4
192-
make install
193-
popd
194-
195-
##### ios simulator x86_64 vulkan
196-
mkdir -p build-ios-sim-vulkan
197-
pushd build-ios-sim-vulkan
198-
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iossimxc-x64.toolchain.cmake -DENABLE_BITCODE=OFF -DVulkan_INCLUDE_DIR=${VULKAN_SDK}/MoltenVK/include -DVulkan_LIBRARY=${VULKAN_SDK}/MoltenVK/iOS/MoltenVK.framework/MoltenVK -DNCNN_VULKAN=ON ..
199-
make
200-
make install
201-
popd
202-
203-
##### ios simulator x86_64 vulkan bitcode
204-
mkdir -p build-ios-sim-vulkan-bitcode
205-
pushd build-ios-sim-vulkan-bitcode
206-
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/iossimxc-x64.toolchain.cmake -DENABLE_BITCODE=ON -DVulkan_INCLUDE_DIR=${VULKAN_SDK}/MoltenVK/include -DVulkan_LIBRARY=${VULKAN_SDK}/MoltenVK/iOS/MoltenVK.framework/MoltenVK -DNCNN_VULKAN=ON ..
207-
make -j4
208-
make install
209-
popd
210-
211115
##### MacOS
212116
mkdir -p build-mac
213117
pushd build-mac

‎docs/Home.md

-2
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ int main()
2121
net.load_model("model.bin");
2222

2323
ncnn::Extractor ex = net.create_extractor();
24-
ex.set_light_mode(true);
25-
ex.set_num_threads(4);
2624

2725
ex.input("data", in);
2826

+111
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
# layer feature mask
2+
3+
Each ncnn layer allows a special parameter pair `31=X` to control specific bahavior.
4+
5+
X is an unsigned integer with each bit contributing a feature mask.
6+
7+
We usually use it to configuring fine-graded behaviors for certain layers to maintain accuracy, reduce memory usage or optimize performance.
8+
9+
|bit|value|mask|rationale|
10+
|---|---|---|---|
11+
|1<<0|1|no fp16 arithmetic|precision concern|
12+
|1<<1|2|no fp16 storage|precision concern|
13+
|1<<2|4|no bf16 storage|precision concern|
14+
|1<<3|8|no int8|debug dynamic quantized model|
15+
|1<<4|16|no vulkan|reduce overhead for cpu op - gpu split - cpu op|
16+
|1<<5|32|no sgemm|reduce some memory|
17+
|1<<6|64|no winograd|reduce some memory|
18+
|1<<7|128|no threading|force single thread|
19+
20+
These bits can be OR-combined into one value to control multiple behaviors simultaneously.
21+
22+
For example, `31=17` means disabling both vulkan and fp16 arithmetic.
23+
24+
## disable fp16 for certain layer to fix overflow
25+
26+
```ruby
27+
7767517
28+
3 3
29+
Input input 0 1 input0 0=22 1=22 2=32
30+
Convolution conv0 1 1 input0 conv0 0=32 1=1 6=1024 9=1
31+
Convolution conv1 1 1 conv0 conv1 0=128 1=3 6=36864 9=1
32+
```
33+
34+
Typically, we use fp16 computation to improve inference speed.
35+
However, since the weight value of `conv1` is very large, fp16 accumulation may cause numerical overflow, so fp16 needs to be disabled individually for `conv1`, while other layers continue to use fp16 mode
36+
37+
Add `31=3` to disable fp16 storage and arithmetic.
38+
39+
```ruby
40+
7767517
41+
3 3
42+
Input input 0 1 input0 0=22 1=22 2=32
43+
Convolution conv0 1 1 input0 conv0 0=32 1=1 6=1024 9=1
44+
Convolution conv1 1 1 conv0 conv1 0=128 1=3 6=36864 9=1 31=3
45+
```
46+
47+
## disable vulkan for certain layer to improve performance
48+
49+
```ruby
50+
7767517
51+
5 5
52+
Input input 0 1 input0 0=22 1=22 2=32
53+
Convolution conv0 1 1 input0 conv0 0=32 1=1 6=1024 9=1
54+
SomeCPULayer c0 1 1 conv0 c0 0=32
55+
ReLU relu0 1 1 c0 relu0
56+
SomeCPULayer c1 1 1 relu0 c1 0=32
57+
```
58+
59+
Between the CPU layers, there is a simple calculation layer that supports vulkan. We can set `31=16` to force it to run on CPU. This can avoid the overhead of data upload, download and storage layout conversion between CPU and GPU. After all, CPU is fast enough for simple operations.
60+
61+
```ruby
62+
7767517
63+
5 5
64+
Input input 0 1 input0 0=22 1=22 2=32
65+
Convolution conv0 1 1 input0 conv0 0=32 1=1 6=1024 9=1
66+
SomeCPULayer c0 1 1 conv0 c0 0=32
67+
ReLU relu0 1 1 c0 relu0 31=16
68+
SomeCPULayer c1 1 1 relu0 c1 0=32
69+
```
70+
71+
## disable winograd for certain layer to reduce memory usage
72+
73+
```ruby
74+
7767517
75+
3 3
76+
Input input 0 1 input0 0=22 1=22 2=32
77+
Convolution conv0 1 1 input0 conv0 0=32 1=1 6=1024 9=1
78+
Convolution conv1 1 1 conv0 conv1 0=128 1=3 6=36864 9=1
79+
```
80+
81+
The winograd technology uses more memory for the purpose of improving convolution performance, but this is not always true. In some memory-constrained situations, or memory IO bottlenecks, we can disable the use of winograd on some layers in exchange for a smaller memory footprint. Add `31=64` to Convolution layer, which forces it to use implcit-gemm or tiled im2col-gemm implementation, reducing memory usage and sometimes improving vulkan performance.
82+
83+
```ruby
84+
7767517
85+
3 3
86+
Input input 0 1 input0 0=22 1=22 2=32
87+
Convolution conv0 1 1 input0 conv0 0=32 1=1 6=1024 9=1
88+
Convolution conv1 1 1 conv0 conv1 0=128 1=3 6=36864 9=1 31=64
89+
```
90+
91+
## disable threading for certain layer to improve performance
92+
93+
```ruby
94+
7767517
95+
4 4
96+
Input input 0 1 input0 0=22 1=22 2=3
97+
Convolution conv0 1 1 input0 conv0 0=16 1=3 6=432
98+
HardSigmoid hs 1 1 conv0 hs0
99+
Convolution conv1 1 1 hs0 conv1 0=16 1=3 6=2304
100+
```
101+
102+
The overhead of multi-thread dispatch and merging is too large for small tensors. Add `31=128` to HardSigmoid layer, which forces it to execute in a single thread, reducing power consumption and improving performance.
103+
104+
```ruby
105+
7767517
106+
4 4
107+
Input input 0 1 input0 0=22 1=22 2=3
108+
Convolution conv0 1 1 input0 conv0 0=16 1=3 6=432
109+
HardSigmoid hs 1 1 conv0 hs0 31=128
110+
Convolution conv1 1 1 hs0 conv1 0=16 1=3 6=2304
111+
```

0 commit comments

Comments
 (0)
Please sign in to comment.