Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
286 changes: 210 additions & 76 deletions test/include/nntrainer_test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,92 +103,92 @@ class ScopedIni {
nntrainer::IniWrapper ini;
};

#define GEN_TEST_INPUT_NHWC(input, eqation_i_j_k_l) \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about keeping the whitespace at the end of each line? And remove file diffs?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe the previous version has incorrect formatting, likely due to being formatted with an outdated version.
(The last time this file was edited was a year ago!)

do { \
for (int i = 0; i < batch; ++i) { \
for (int j = 0; j < height; ++j) { \
for (int k = 0; k < width; ++k) { \
for (int l = 0; l < channel; ++l) { \
float val = (eqation_i_j_k_l); \
input.setValue(i, l, j, k, val); \
} \
} \
} \
} \
#define GEN_TEST_INPUT_NHWC(input, eqation_i_j_k_l) \
do { \
for (int i = 0; i < batch; ++i) { \
for (int j = 0; j < height; ++j) { \
for (int k = 0; k < width; ++k) { \
for (int l = 0; l < channel; ++l) { \
float val = (eqation_i_j_k_l); \
input.setValue(i, l, j, k, val); \
} \
} \
} \
} \
} while (0)

#define GEN_TEST_INPUT(input, eqation_i_j_k_l) \
do { \
for (int i = 0; i < batch; ++i) { \
for (int j = 0; j < channel; ++j) { \
for (int k = 0; k < height; ++k) { \
for (int l = 0; l < width; ++l) { \
float val = (eqation_i_j_k_l); \
input.setValue(i, j, k, l, val); \
} \
} \
} \
} \
#define GEN_TEST_INPUT(input, eqation_i_j_k_l) \
do { \
for (int i = 0; i < batch; ++i) { \
for (int j = 0; j < channel; ++j) { \
for (int k = 0; k < height; ++k) { \
for (int l = 0; l < width; ++l) { \
float val = (eqation_i_j_k_l); \
input.setValue(i, j, k, l, val); \
} \
} \
} \
} \
} while (0)

#define GEN_TEST_INPUT_RAND(input, min, max) \
do { \
for (int i = 0; i < batch; ++i) { \
for (int j = 0; j < channel; ++j) { \
for (int k = 0; k < height; ++k) { \
for (int l = 0; l < width; ++l) { \
std::uniform_real_distribution<double> dist(min, max); \
std::default_random_engine gen((k + 1) * (l + 42)); \
float val = dist(gen); \
input.setValue(i, j, k, l, val); \
} \
} \
} \
} \
#define GEN_TEST_INPUT_RAND(input, min, max) \
do { \
for (int i = 0; i < batch; ++i) { \
for (int j = 0; j < channel; ++j) { \
for (int k = 0; k < height; ++k) { \
for (int l = 0; l < width; ++l) { \
std::uniform_real_distribution<double> dist(min, max); \
std::default_random_engine gen((k + 1) * (l + 42)); \
float val = dist(gen); \
input.setValue(i, j, k, l, val); \
} \
} \
} \
} \
} while (0)

#define GEN_TEST_INPUT_RAND_B(input, min, max) \
do { \
for (int i = 0; i < batch; ++i) { \
for (int j = 0; j < channel; ++j) { \
for (int k = 0; k < height_b; ++k) { \
for (int l = 0; l < width_b; ++l) { \
std::uniform_real_distribution<double> dist(min, max); \
std::default_random_engine gen((k + 42) * (l + 1)); \
float val = dist(gen); \
input.setValue(i, j, k, l, val); \
} \
} \
} \
} \
#define GEN_TEST_INPUT_RAND_B(input, min, max) \
do { \
for (int i = 0; i < batch; ++i) { \
for (int j = 0; j < channel; ++j) { \
for (int k = 0; k < height_b; ++k) { \
for (int l = 0; l < width_b; ++l) { \
std::uniform_real_distribution<double> dist(min, max); \
std::default_random_engine gen((k + 42) * (l + 1)); \
float val = dist(gen); \
input.setValue(i, j, k, l, val); \
} \
} \
} \
} \
} while (0)

#define GEN_TEST_INPUT_B(input, equation_i_j_k_l) \
do { \
for (int i = 0; i < batch; ++i) { \
for (int j = 0; j < channel; ++j) { \
for (int k = 0; k < height_b; ++k) { \
for (int l = 0; l < width_b; ++l) { \
float val = (equation_i_j_k_l); \
input.setValue(i, j, k, l, val); \
} \
} \
} \
} \
#define GEN_TEST_INPUT_B(input, equation_i_j_k_l) \
do { \
for (int i = 0; i < batch; ++i) { \
for (int j = 0; j < channel; ++j) { \
for (int k = 0; k < height_b; ++k) { \
for (int l = 0; l < width_b; ++l) { \
float val = (equation_i_j_k_l); \
input.setValue(i, j, k, l, val); \
} \
} \
} \
} \
} while (0)

#define GEN_TEST_INPUT_C(input, equation_i_j_k_l) \
do { \
for (int i = 0; i < batch_b; ++i) { \
for (int j = 0; j < channel; ++j) { \
for (int k = 0; k < height; ++k) { \
for (int l = 0; l < width; ++l) { \
float val = (equation_i_j_k_l); \
input.setValue(i, j, k, l, val); \
} \
} \
} \
} \
#define GEN_TEST_INPUT_C(input, equation_i_j_k_l) \
do { \
for (int i = 0; i < batch_b; ++i) { \
for (int j = 0; j < channel; ++j) { \
for (int k = 0; k < height; ++k) { \
for (int l = 0; l < width; ++l) { \
float val = (equation_i_j_k_l); \
input.setValue(i, j, k, l, val); \
} \
} \
} \
} \
} while (0)

/**
Expand Down Expand Up @@ -435,5 +435,139 @@ struct static_cast_func {
}
};

#define EXPECT_IN_RANGE(VAL, MIN, MAX) \
EXPECT_GE((VAL), (MIN)); \
EXPECT_LE((VAL), (MAX))

#ifdef ENABLE_OPENCL
#include <cl_context.h>
#include <engine.h>

inline void *allocateSVM(size_t size_bytes) {
auto *blas_cc = static_cast<nntrainer::ClContext *>(
nntrainer::Engine::Global().getRegisteredContext("gpu"));

void *ptr = blas_cc->context_inst_.createSVMRegion(size_bytes);

if (ptr == nullptr) {
throw std::runtime_error(
"Failed to allocated SVM for the OpenCL BLAS unit test.");
}

return ptr;
}

inline void freeSVM(void *ptr) {
auto *blas_cc = static_cast<nntrainer::ClContext *>(
nntrainer::Engine::Global().getRegisteredContext("gpu"));

blas_cc->context_inst_.releaseSVMRegion(ptr);
ptr = nullptr;
}
#endif

/**
* @brief Helper function to generate random data
*
* @tparam T data type
* @tparam random_init True if want random
* @param size data length
* @param min_val minimum value
* @param max_val maximum value
* @return std::vector<T> random vector
*/
template <typename T, bool random_init = false>
static inline std::vector<T>
generate_random_vector(size_t size, float min_val = -1.F, float max_val = 1.F) {
std::random_device rd;
auto init_val = random_init ? rd() : 42;
std::mt19937 gen(init_val);
std::uniform_real_distribution<float> dist(min_val, max_val);
std::vector<T> vec(size);
for (auto &val : vec) {
val = static_cast<T>(dist(gen));
}
return vec;
}

static inline std::vector<float> generate_vector(const size_t size,
float min_val, float max_val) {
const float step = (max_val - min_val) / (float)size;
float current_value = min_val;
std::vector<float> vec(size, 0.0f);

for (int i = 0; i < vec.size(); ++i) {
vec[i] = current_value;
current_value += step;
}

return vec;
}

static inline void printMatrixF(const char *name, float *data, int Y, int X) {
printf("%s :\n", name);
for (int y = 0; y < Y; y++) {
printf("[");
for (int x = 0; x < X; x++) {
std::cout << data[y * X + x] << " ";
}
printf("]\n");
}
}

static inline void printMatrixI(const char *name, float *data, int Y, int X) {
printf("%s :\n", name);
for (int y = 0; y < Y; y++) {
// printf("[");
for (int x = 0; x < X; x++) {
if (x % 10 == 0) {
printf("| ");
}
std::cout << (int)(0.5f + data[y * X + x]) << " ";
}
printf("\n");
}
}

static inline std::vector<float> generate_01_vector(const size_t size,
const float ones_ratio) {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> dist(0.0f, (float)size);
if (ones_ratio >= 1.0) {
std::vector<float> vec(size, 1.0f);
return vec;
} else {
std::vector<float> vec(size, 0.0f);
size_t ones_cnt = (size_t)(size * ones_ratio);
for (size_t i = 0; i < ones_cnt; i++) {
int pos = static_cast<int>(dist(gen));
vec[pos] = 1.0f;
}
return vec;
}
}

/**
* @brief Helper function to print data
*
* @param data
* @param size
* @param count
*/
template <typename T = float>
static void debug_print_beg_end(const T *const data, const unsigned int size,
const uint32_t count = 5) {
std::cout << "[";
for (unsigned int i = 0; i < count; ++i) {
std::cout << std::fixed << std::setprecision(3) << data[i] << " ";
}
std::cout << "][";
for (unsigned int i = size - count; i < size; ++i) {
std::cout << std::fixed << std::setprecision(3) << data[i] << " ";
}
std::cout << "]" << std::endl;
};

#endif /* __cplusplus */
#endif /* __NNTRAINER_TEST_UTIL_H__ */
48 changes: 46 additions & 2 deletions test/jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -658,13 +658,57 @@ include $(BUILD_EXECUTABLE)
ifeq ($(MESON_ENABLE_OPENCL), 1)
include $(CLEAR_VARS)

LOCAL_MODULE := unittest_blas_kernels_cl
LOCAL_MODULE := unittest_opencl_kernels_blas
LOCAL_CFLAGS := -Igoogletest/include -I../include -I../unittest/layers -I../../nntrainer/layers/loss -pthread -fexceptions -fopenmp -static-openmp -DMIN_CPP_VERSION=201703L -DNNTR_NUM_THREADS=1 -D__LOGGING__=1 -DENABLE_TEST=1 -DREDUCE_TOLERANCE=1 -march=armv8.2-a+fp16+dotprod+i8mm -O3 -frtti -DNDK_BUILD=1 -DENABLE_FP16=1 -DENABLE_OPENCL=1
LOCAL_CXXFLAGS += -std=c++17 -frtti -fexceptions
LOCAL_LDLIBS := -llog -landroid -fopenmp -static-openmp

LOCAL_SRC_FILES := \
../unittest/unittest_blas_kernels_cl.cpp
../unittest/unittest_opencl_kernels_blas.cpp

LOCAL_C_INCLUDES += $(NNTRAINER_INCLUDES)

LOCAL_SHARED_LIBRARIES := nntrainer ccapi-nntrainer
LOCAL_STATIC_LIBRARIES := googletest_main test_util

ifeq ($(MESON_ENABLE_OPENCL), 1)
LOCAL_SHARED_LIBRARIES += opencl
LOCAL_STATIC_LIBRARIES += clblast
endif

include $(BUILD_EXECUTABLE)

include $(CLEAR_VARS)

LOCAL_MODULE := unittest_opencl_kernels_int4
LOCAL_CFLAGS := -Igoogletest/include -I../include -I../unittest/layers -I../../nntrainer/layers/loss -pthread -fexceptions -fopenmp -static-openmp -DMIN_CPP_VERSION=201703L -DNNTR_NUM_THREADS=1 -D__LOGGING__=1 -DENABLE_TEST=1 -DREDUCE_TOLERANCE=1 -march=armv8.2-a+fp16+dotprod+i8mm -O3 -frtti -DNDK_BUILD=1 -DENABLE_FP16=1 -DENABLE_OPENCL=1
LOCAL_CXXFLAGS += -std=c++17 -frtti -fexceptions
LOCAL_LDLIBS := -llog -landroid -fopenmp -static-openmp

LOCAL_SRC_FILES := \
../unittest/unittest_opencl_kernels_int4.cpp

LOCAL_C_INCLUDES += $(NNTRAINER_INCLUDES)

LOCAL_SHARED_LIBRARIES := nntrainer ccapi-nntrainer
LOCAL_STATIC_LIBRARIES := googletest_main test_util

ifeq ($(MESON_ENABLE_OPENCL), 1)
LOCAL_SHARED_LIBRARIES += opencl
LOCAL_STATIC_LIBRARIES += clblast
endif

include $(BUILD_EXECUTABLE)

include $(CLEAR_VARS)

LOCAL_MODULE := unittest_opencl_kernels_qk_k
LOCAL_CFLAGS := -Igoogletest/include -I../include -I../unittest/layers -I../../nntrainer/layers/loss -pthread -fexceptions -fopenmp -static-openmp -DMIN_CPP_VERSION=201703L -DNNTR_NUM_THREADS=1 -D__LOGGING__=1 -DENABLE_TEST=1 -DREDUCE_TOLERANCE=1 -march=armv8.2-a+fp16+dotprod+i8mm -O3 -frtti -DNDK_BUILD=1 -DENABLE_FP16=1 -DENABLE_OPENCL=1
LOCAL_CXXFLAGS += -std=c++17 -frtti -fexceptions
LOCAL_LDLIBS := -llog -landroid -fopenmp -static-openmp

LOCAL_SRC_FILES := \
../unittest/unittest_opencl_kernels_qk_k.cpp

LOCAL_C_INCLUDES += $(NNTRAINER_INCLUDES)

Expand Down
4 changes: 3 additions & 1 deletion test/unittest/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ if host_machine.system() != 'windows'
endif

if get_option('enable-opencl')
test_target += [['unittest_blas_kernels_cl', []]]
test_target += [['unittest_opencl_kernels_blas', []]]
test_target += [['unittest_opencl_kernels_int4', []]]
test_target += [['unittest_opencl_kernels_qk_k', []]]
test_target += [['unittest_attention_kernels_cl', []]]
endif

Expand Down
Loading
Loading