Fanghc95
diff --git a/‎CMakeLists.txt
+121 b/‎CMakeLists.txt
+121
diff --git a/‎CMakePreBuild.sh
+26 b/‎CMakePreBuild.sh
+26
diff --git a/‎README.md
+1 b/‎README.md
+1
diff --git a/‎activation_kernels.cu
+91 b/‎activation_kernels.cu
+91
diff --git a/‎activations.h
+16 b/‎activations.h
+16
diff --git a/‎blas.h
+14 b/‎blas.h
+14
diff --git a/‎blas_kernels.cu
+41 b/‎blas_kernels.cu
+41
@@ -0,0 +1,121 @@
+
+cmake_minimum_required(VERSION 2.8)
+project(sysDetectSpeed)
+
+# setup tensorRT flags
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")	# -std=gnu++11
+set(BUILD_DEPS "YES" CACHE BOOL "If YES, will install dependencies into sandbox.  Automatically reset to NO after dependencies are installed.")
+
+# if this is the first time running cmake, perform pre-build dependency install script (or if the user manually triggers re-building the dependencies)
+#if( ${BUILD_DEPS} )
+#	message("Launching pre-build dependency installer script...")
+#
+#	execute_process(COMMAND sh ../CMakePreBuild.sh
+#				WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
+#				RESULT_VARIABLE PREBUILD_SCRIPT_RESULT)
+
+#	set(BUILD_DEPS "NO" CACHE BOOL "If YES, will install dependencies into sandbox.  #Automatically reset to NO after dependencies are installed." FORCE)
+#	message("Finished installing dependencies")
+#endif()
+
+
+# Qt is used to load images (installed by ubuntu-desktop)
+find_package(Qt4 REQUIRED)
+include(${QT_USE_FILE})
+add_definitions(${QT_DEFINITIONS})
+
+
+# setup CUDA
+find_package(CUDA)
+
+set(
+	CUDA_NVCC_FLAGS
+	${CUDA_NVCC_FLAGS}; 
+    -O3 
+	-gencode arch=compute_53,code=sm_53
+	-gencode arch=compute_62,code=sm_62
+)
+
+
+
+# setup project output paths
+set(PROJECT_OUTPUT_DIR  ${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_PROCESSOR})
+set(PROJECT_INCLUDE_DIR ${PROJECT_OUTPUT_DIR}/include)
+
+file(MAKE_DIRECTORY ${PROJECT_INCLUDE_DIR})
+file(MAKE_DIRECTORY ${PROJECT_OUTPUT_DIR}/bin)
+
+message("-- system arch:  ${CMAKE_SYSTEM_PROCESSOR}")
+message("-- output path:  ${PROJECT_OUTPUT_DIR}")
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/bin)
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/lib)
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/lib)
+
+
+# build C/C++ interface
+include_directories(${PROJECT_INCLUDE_DIR} ${GIE_PATH}/include)
+include_directories(/usr/include/gstreamer-1.0 /usr/lib/aarch64-linux-gnu/gstreamer-1.0/include /usr/include/glib-2.0 /usr/include/libxml2 /usr/lib/aarch64-linux-gnu/glib-2.0/include/ /usr/local/cuda-9.0/extras/CUPTI/include)
+include_directories(${PROJECT_INCLUDE_DIR} 
+	/home/nvidia/caffe/include 
+	/home/nvidia/caffe/build/include 
+	/usr/local/include  
+	/usr/local/cuda/include  
+	/usr/include
+)
+
+
+file(GLOB inferenceSources *.cpp *.cu )
+file(GLOB inferenceIncludes *.h )
+
+cuda_add_library(sysDetectSpeed SHARED ${inferenceSources})
+target_link_libraries(sysDetectSpeed nvcaffe_parser nvinfer Qt4::QtGui GL gstreamer-1.0 gstapp-1.0)	#GLEW	# gstreamer-0.10 gstbase-0.10 gstapp-0.10 
+target_link_libraries(sysDetectSpeed 
+	/home/nvidia/caffe/build/lib/libcaffe.so  
+	/usr/lib/aarch64-linux-gnu/libglog.so  
+	/usr/lib/aarch64-linux-gnu/libgflags.so.2
+    	/usr/lib/aarch64-linux-gnu/libboost_system.so  
+	/usr/lib/aarch64-linux-gnu/libGLEW.so.1.13  
+)
+
+
+# transfer all headers to the include directory
+foreach(include ${inferenceIncludes})
+	message("-- Copying ${include}")
+	configure_file(${include} ${PROJECT_INCLUDE_DIR} COPYONLY)
+endforeach()
+
+
+# create symbolic link for network data
+execute_process( COMMAND "${CMAKE_COMMAND}" "-E" "create_symlink" "${PROJECT_SOURCE_DIR}/data/networks" "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/networks" )
+  
+  
+# copy image data
+file(GLOB imageData ${PROJECT_SOURCE_DIR}/data/images/*)
+
+foreach(image ${imageData})
+	message("-- Copying ${image}")
+	file(COPY ${image} DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
+	#configure_file(${include} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} COPYONLY)
+endforeach()
+
+
+add_subdirectory(detectnet)
+
+
+#setup opencv
+FIND_PACKAGE(OpenCV REQUIRED)
+
+TARGET_LINK_LIBRARIES(sysDetectSpeed ${OpenCV_LIBS})
+
+# install
+foreach(include ${inferenceIncludes})
+    install(FILES "${include}" DESTINATION include/sysDetectSpeed)
+endforeach()
+
+# install the shared library
+install(TARGETS sysDetectSpeed DESTINATION lib/sysDetectSpeed EXPORT sysDetectSpeedConfig)
+
+# install the cmake project, for importing
+install(EXPORT sysDetectSpeedConfig DESTINATION share/sysDetectSpeed/cmake)
+
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+# this script is automatically run from CMakeLists.txt
+
+BUILD_ROOT=$PWD
+TORCH_PREFIX=$PWD/torch
+
+echo "[Pre-build]  dependency installer script running..."
+echo "[Pre-build]  build root directory:       $BUILD_ROOT"
+
+
+# break on errors
+#set -e
+
+
+# install packages
+sudo apt-get update
+sudo apt-get install -y libqt4-dev qt4-dev-tools libglew-dev glew-utils libgstreamer1.0-dev libgstreamer-plugins-base1.0-dev libglib2.0-dev
+sudo apt-get update
+
+sudo rm /usr/lib/aarch64-linux-gnu/libGL.so
+sudo ln -s /usr/lib/aarch64-linux-gnu/tegra/libGL.so /usr/lib/aarch64-linux-gnu/libGL.so
+
+# maximize performance
+sudo nvpmodel -m 0
+sudo ~/jetson_clock.sh
+echo "[Pre-build]  Finished CMakePreBuild script"
@@ -0,0 +1 @@
+# caffe-yolov3
@@ -0,0 +1,91 @@
+/*
+ * Company:	Systhesis
+ * Author: 	Chen
+ * Date:	2018/06/04
+ */
+#include "activations.h"
+#include "cuda.h"
+#include "blas.h"
+
+
+
+__device__ float lhtan_activate_kernel(float x)
+{
+    if(x < 0) return .001f*x;
+    if(x > 1) return .001f*(x-1.f) + 1.f;
+    return x;
+}
+
+__device__ float hardtan_activate_kernel(float x)
+{
+    if (x < -1) return -1;
+    if (x > 1) return 1;
+    return x;
+}
+
+__device__ float linear_activate_kernel(float x){return x;}
+__device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));}
+__device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;}
+__device__ float relu_activate_kernel(float x){return x*(x>0);}
+__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);}
+__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;}
+__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;}
+__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;}
+__device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);}
+__device__ float plse_activate_kernel(float x)
+{
+    if(x < -4) return .01f * (x + 4);
+    if(x > 4)  return .01f * (x - 4) + 1;
+    return .125f*x + .5f;
+}
+__device__ float stair_activate_kernel(float x)
+{
+    int n = floorf(x);
+    if (n%2 == 0) return floorf(x/2);
+    else return (x - n) + floorf(x/2);
+}
+
+__device__ float activate_kernel(float x, ACTIVATION a)
+{
+    switch(a){
+        case LINEAR:
+            return linear_activate_kernel(x);
+        case LOGISTIC:
+            return logistic_activate_kernel(x);
+        case LOGGY:
+            return loggy_activate_kernel(x);
+        case RELU:
+            return relu_activate_kernel(x);
+        case ELU:
+            return elu_activate_kernel(x);
+        case RELIE:
+            return relie_activate_kernel(x);
+        case RAMP:
+            return ramp_activate_kernel(x);
+        case LEAKY:
+            return leaky_activate_kernel(x);
+        case TANH:
+            return tanh_activate_kernel(x);
+        case PLSE:
+            return plse_activate_kernel(x);
+        case STAIR:
+            return stair_activate_kernel(x);
+        case HARDTAN:
+            return hardtan_activate_kernel(x);
+        case LHTAN:
+            return lhtan_activate_kernel(x);
+    }
+    return 0;
+}
+
+__global__ void activate_array_kernel(float *x, int n, ACTIVATION a)
+{
+    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
+    if(i < n) x[i] = activate_kernel(x[i], a);
+}
+
+void activate_array_gpu(float *x, int n, ACTIVATION a)
+{
+    activate_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a);
+    check_error(cudaPeekAtLastError());
+}
@@ -0,0 +1,16 @@
+/*
+ * Company:	Systhesis
+ * Author: 	Chen
+ * Date:	2018/06/04
+ */
+
+#ifndef __ACTIVATIONS_H_
+#define __ACTIVATIONS_H_
+
+typedef enum{
+    LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN
+} ACTIVATION;
+
+void activate_array_gpu(float* x,int n,ACTIVATION a);
+
+#endif
@@ -0,0 +1,14 @@
+/*
+ * Company:	Systhesis
+ * Author: 	Chen
+ * Date:	2018/06/04
+ */
+
+#ifndef __BLAS_H_
+#define __BLAS_H_
+
+void copy_gpu(int N,float* X,int INCX,float* Y,int INCY);
+
+void fill_gpu(int N, float ALPHA, float * X, int INCX);
+
+#endif
@@ -0,0 +1,41 @@
+/*
+ * Company:	Systhesis
+ * Author: 	Chen
+ * Date:	2018/06/04
+ */
+
+
+#include <assert.h>
+
+#include "cuda.h"
+#include "blas.h"
+
+__global__ void copy_kernel(int N,float* X,int OFFX,int INCX,float* Y,int OFFY,int INCY)
+{
+    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
+    if(i < N) Y[i*INCY + OFFY] = X[i*INCX + OFFX];
+}
+
+__global__ void fill_kernel(int N, float ALPHA, float *X, int INCX)
+{
+    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
+    if(i < N) X[i*INCX] = ALPHA;
+}
+
+void copy_gpu_offset(int N,float* X,int OFFX,int INCX,float* Y,int OFFY,int INCY)
+{
+    copy_kernel<<<cuda_gridsize(N),BLOCK>>>(N,X,OFFX,INCX,Y,OFFY,INCY);
+    check_error(cudaPeekAtLastError());
+}
+
+void copy_gpu(int N,float* X,int INCX,float* Y,int INCY)
+{
+    copy_gpu_offset(N,X,0,INCX,Y,0,INCY);
+}
+
+
+void fill_gpu(int N, float ALPHA, float * X, int INCX)
+{
+    fill_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
+    check_error(cudaPeekAtLastError());
+}