Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Applications/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ if get_option('enable-transformer')
if host_machine.system() != 'windows'
subdir('PicoGPT/jni')
endif
if (get_option('platform') != 'tizen') and (get_option('platform') != 'windows')
if get_option('platform') != 'windows'
subdir('CausalLM')
endif
endif
endif
10 changes: 7 additions & 3 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ project('nntrainer', 'c', 'cpp',
default_options: [
'werror=true',
'warning_level=1',
'c_std=gnu89',
'c_std=gnu99',
'cpp_std=c++17',
'buildtype=release'
]
Expand Down Expand Up @@ -86,7 +86,10 @@ warning_flags = [
'-Wno-unused-variable',
'-Wno-comment',
'-Wno-ignored-attributes',
'-Wno-sign-compare'
'-Wno-sign-compare',
'-Wno-unused-but-set-variable',
'-Wno-unused-function',
'-Wno-array-bounds'
]

if cxx_compiler_id == 'clang'
Expand All @@ -105,7 +108,8 @@ warning_c_flags = [
'-Wnested-externs',
'-Waggregate-return',
'-Wold-style-definition',
'-Wdeclaration-after-statement',
'-Wno-declaration-after-statement',
'-Wno-unused-but-set-variable',
'-Wno-error=varargs'
]

Expand Down
4 changes: 4 additions & 0 deletions nntrainer/tensor/cpu_backend/arm/hgemm/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ hgemm_headers = [
'hgemm_util.h',
'hgemm_pack.h',
'hgemm_common.h',
'hgemm_noTrans.h',
'hgemm_transA.h',
'hgemm_transAB.h',
'hgemm_transB.h'
]

subdir('hgemm_kernel')
Expand Down
1 change: 1 addition & 0 deletions nntrainer/tensor/cpu_backend/arm/neon_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include <arm_neon.h>
#include <cmath>
#include <climits>
#include <limits>
#include <neon_mathfun.h>
#include <tensor_dim.h>
Expand Down
2 changes: 2 additions & 0 deletions nntrainer/tensor/cpu_backend/arm/neon_impl_fp16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
*
*/

#include <cstring>

#include <hgemm.h>
#include <matrix_transpose_neon.h>
#include <memory>
Expand Down
2 changes: 1 addition & 1 deletion nntrainer/tensor/cpu_backend/arm/neon_kleidiai.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ void nntr_kai_gemm_qai8dxp_qsi4cxp_olp_n_parallel(
int n_threads = 4;
assert(n % n_threads == 0);
size_t n_ukernel = n / n_threads;
#pragma omp parallel for num_thread(n_threads)
#pragma omp parallel for num_threads(n_threads)
for (int current_thread = 0; current_thread < n_threads; ++current_thread) {
const size_t dst_stride = n * sizeof(float);
const size_t lhs_offset =
Expand Down
17 changes: 15 additions & 2 deletions packaging/nntrainer.spec
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@

%endif # 0%{tizen_version_major}%{tizen_version_minor} >= 65

%define enable_fp16 0
### nntrainer fp16 implementation relies on NEON, which requires armv8.2-a
### armv7l Tizen: do not support fp16 neon.
### aarch64 Tizen: uses armv8.0a. no fp16 neon.
Expand All @@ -61,6 +60,11 @@
%define fp16_support -Denable-fp16=false
%endif # enable_fp16

%if (0%{?enable_fp16}) && (0%{?enable_transformer})
%define transformer_support -Denable-transformer=true
%else
%define transformer_support -Denable-transformer=false
%endif

## GPU flag
## To enable OpenCL, pass the flag to gbs build with: --define "_with_gpu 1"
Expand Down Expand Up @@ -461,7 +465,8 @@ meson --buildtype=plain --prefix=%{_prefix} --sysconfdir=%{_sysconfdir} \
%{enable_reduce_tolerance} %{configure_subplugin_install_path} %{enable_debug} \
-Dml-api-support=enabled -Denable-nnstreamer-tensor-filter=enabled \
-Denable-nnstreamer-tensor-trainer=enabled -Denable-capi=enabled \
%{fp16_support} %{opencl_support} build --wrap-mode=nodownload
%{fp16_support} %{opencl_support} build --wrap-mode=nodownload \
%{transformer_support}

ninja -C build %{?_smp_mflags}

Expand Down Expand Up @@ -628,6 +633,14 @@ cp -r result %{buildroot}%{_datadir}/nntrainer/unittest/
%{_includedir}/nntrainer/hgemm_transA.h
%{_includedir}/nntrainer/hgemm_transAB.h
%{_includedir}/nntrainer/hgemm_transB.h
%{_includedir}/nntrainer/hgemm_pack.h
%{_includedir}/nntrainer/hgemm_padding.h
%{_includedir}/nntrainer/hgemm_padding_a.h
%{_includedir}/nntrainer/hgemm_padding_b.h
%{_includedir}/nntrainer/kai_common.h
%{_includedir}/nntrainer/mask_neon.h
%{_includedir}/nntrainer/matrix_transpose_kernels_neon.h
%{_includedir}/nntrainer/neon_kleidiai.h
%{_includedir}/nntrainer/kai_lhs_quant_pack_qai8dxp_f32.h
%{_includedir}/nntrainer/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp4x8_1x4x32_neon_dotprod.h
%{_includedir}/nntrainer/kai_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod.h
Expand Down
Loading