Skip to content

Commit b14a7fa

Browse files
committed
注释语句
1 parent 5ca60ff commit b14a7fa

17 files changed

+1276
-0
lines changed

CMakeLists.txt

+121
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
2+
cmake_minimum_required(VERSION 2.8)
3+
project(sysDetectSpeed)
4+
5+
# setup tensorRT flags
6+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") # -std=gnu++11
7+
set(BUILD_DEPS "YES" CACHE BOOL "If YES, will install dependencies into sandbox. Automatically reset to NO after dependencies are installed.")
8+
9+
# if this is the first time running cmake, perform pre-build dependency install script (or if the user manually triggers re-building the dependencies)
10+
#if( ${BUILD_DEPS} )
11+
# message("Launching pre-build dependency installer script...")
12+
#
13+
# execute_process(COMMAND sh ../CMakePreBuild.sh
14+
# WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
15+
# RESULT_VARIABLE PREBUILD_SCRIPT_RESULT)
16+
17+
# set(BUILD_DEPS "NO" CACHE BOOL "If YES, will install dependencies into sandbox. #Automatically reset to NO after dependencies are installed." FORCE)
18+
# message("Finished installing dependencies")
19+
#endif()
20+
21+
22+
# Qt is used to load images (installed by ubuntu-desktop)
23+
find_package(Qt4 REQUIRED)
24+
include(${QT_USE_FILE})
25+
add_definitions(${QT_DEFINITIONS})
26+
27+
28+
# setup CUDA
29+
find_package(CUDA)
30+
31+
set(
32+
CUDA_NVCC_FLAGS
33+
${CUDA_NVCC_FLAGS};
34+
-O3
35+
-gencode arch=compute_53,code=sm_53
36+
-gencode arch=compute_62,code=sm_62
37+
)
38+
39+
40+
41+
# setup project output paths
42+
set(PROJECT_OUTPUT_DIR ${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_PROCESSOR})
43+
set(PROJECT_INCLUDE_DIR ${PROJECT_OUTPUT_DIR}/include)
44+
45+
file(MAKE_DIRECTORY ${PROJECT_INCLUDE_DIR})
46+
file(MAKE_DIRECTORY ${PROJECT_OUTPUT_DIR}/bin)
47+
48+
message("-- system arch: ${CMAKE_SYSTEM_PROCESSOR}")
49+
message("-- output path: ${PROJECT_OUTPUT_DIR}")
50+
51+
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/bin)
52+
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/lib)
53+
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_OUTPUT_DIR}/lib)
54+
55+
56+
# build C/C++ interface
57+
include_directories(${PROJECT_INCLUDE_DIR} ${GIE_PATH}/include)
58+
include_directories(/usr/include/gstreamer-1.0 /usr/lib/aarch64-linux-gnu/gstreamer-1.0/include /usr/include/glib-2.0 /usr/include/libxml2 /usr/lib/aarch64-linux-gnu/glib-2.0/include/ /usr/local/cuda-9.0/extras/CUPTI/include)
59+
include_directories(${PROJECT_INCLUDE_DIR}
60+
/home/nvidia/caffe/include
61+
/home/nvidia/caffe/build/include
62+
/usr/local/include
63+
/usr/local/cuda/include
64+
/usr/include
65+
)
66+
67+
68+
file(GLOB inferenceSources *.cpp *.cu )
69+
file(GLOB inferenceIncludes *.h )
70+
71+
cuda_add_library(sysDetectSpeed SHARED ${inferenceSources})
72+
target_link_libraries(sysDetectSpeed nvcaffe_parser nvinfer Qt4::QtGui GL gstreamer-1.0 gstapp-1.0) #GLEW # gstreamer-0.10 gstbase-0.10 gstapp-0.10
73+
target_link_libraries(sysDetectSpeed
74+
/home/nvidia/caffe/build/lib/libcaffe.so
75+
/usr/lib/aarch64-linux-gnu/libglog.so
76+
/usr/lib/aarch64-linux-gnu/libgflags.so.2
77+
/usr/lib/aarch64-linux-gnu/libboost_system.so
78+
/usr/lib/aarch64-linux-gnu/libGLEW.so.1.13
79+
)
80+
81+
82+
# transfer all headers to the include directory
83+
foreach(include ${inferenceIncludes})
84+
message("-- Copying ${include}")
85+
configure_file(${include} ${PROJECT_INCLUDE_DIR} COPYONLY)
86+
endforeach()
87+
88+
89+
# create symbolic link for network data
90+
execute_process( COMMAND "${CMAKE_COMMAND}" "-E" "create_symlink" "${PROJECT_SOURCE_DIR}/data/networks" "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/networks" )
91+
92+
93+
# copy image data
94+
file(GLOB imageData ${PROJECT_SOURCE_DIR}/data/images/*)
95+
96+
foreach(image ${imageData})
97+
message("-- Copying ${image}")
98+
file(COPY ${image} DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
99+
#configure_file(${include} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} COPYONLY)
100+
endforeach()
101+
102+
103+
add_subdirectory(detectnet)
104+
105+
106+
#setup opencv
107+
FIND_PACKAGE(OpenCV REQUIRED)
108+
109+
TARGET_LINK_LIBRARIES(sysDetectSpeed ${OpenCV_LIBS})
110+
111+
# install
112+
foreach(include ${inferenceIncludes})
113+
install(FILES "${include}" DESTINATION include/sysDetectSpeed)
114+
endforeach()
115+
116+
# install the shared library
117+
install(TARGETS sysDetectSpeed DESTINATION lib/sysDetectSpeed EXPORT sysDetectSpeedConfig)
118+
119+
# install the cmake project, for importing
120+
install(EXPORT sysDetectSpeedConfig DESTINATION share/sysDetectSpeed/cmake)
121+

CMakePreBuild.sh

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#!/usr/bin/env bash
2+
# this script is automatically run from CMakeLists.txt
3+
4+
BUILD_ROOT=$PWD
5+
TORCH_PREFIX=$PWD/torch
6+
7+
echo "[Pre-build] dependency installer script running..."
8+
echo "[Pre-build] build root directory: $BUILD_ROOT"
9+
10+
11+
# break on errors
12+
#set -e
13+
14+
15+
# install packages
16+
sudo apt-get update
17+
sudo apt-get install -y libqt4-dev qt4-dev-tools libglew-dev glew-utils libgstreamer1.0-dev libgstreamer-plugins-base1.0-dev libglib2.0-dev
18+
sudo apt-get update
19+
20+
sudo rm /usr/lib/aarch64-linux-gnu/libGL.so
21+
sudo ln -s /usr/lib/aarch64-linux-gnu/tegra/libGL.so /usr/lib/aarch64-linux-gnu/libGL.so
22+
23+
# maximize performance
24+
sudo nvpmodel -m 0
25+
sudo ~/jetson_clock.sh
26+
echo "[Pre-build] Finished CMakePreBuild script"

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# caffe-yolov3

activation_kernels.cu

+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
/*
2+
* Company: Systhesis
3+
* Author: Chen
4+
* Date: 2018/06/04
5+
*/
6+
#include "activations.h"
7+
#include "cuda.h"
8+
#include "blas.h"
9+
10+
11+
12+
__device__ float lhtan_activate_kernel(float x)
13+
{
14+
if(x < 0) return .001f*x;
15+
if(x > 1) return .001f*(x-1.f) + 1.f;
16+
return x;
17+
}
18+
19+
__device__ float hardtan_activate_kernel(float x)
20+
{
21+
if (x < -1) return -1;
22+
if (x > 1) return 1;
23+
return x;
24+
}
25+
26+
__device__ float linear_activate_kernel(float x){return x;}
27+
__device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));}
28+
__device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;}
29+
__device__ float relu_activate_kernel(float x){return x*(x>0);}
30+
__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);}
31+
__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;}
32+
__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;}
33+
__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;}
34+
__device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);}
35+
__device__ float plse_activate_kernel(float x)
36+
{
37+
if(x < -4) return .01f * (x + 4);
38+
if(x > 4) return .01f * (x - 4) + 1;
39+
return .125f*x + .5f;
40+
}
41+
__device__ float stair_activate_kernel(float x)
42+
{
43+
int n = floorf(x);
44+
if (n%2 == 0) return floorf(x/2);
45+
else return (x - n) + floorf(x/2);
46+
}
47+
48+
__device__ float activate_kernel(float x, ACTIVATION a)
49+
{
50+
switch(a){
51+
case LINEAR:
52+
return linear_activate_kernel(x);
53+
case LOGISTIC:
54+
return logistic_activate_kernel(x);
55+
case LOGGY:
56+
return loggy_activate_kernel(x);
57+
case RELU:
58+
return relu_activate_kernel(x);
59+
case ELU:
60+
return elu_activate_kernel(x);
61+
case RELIE:
62+
return relie_activate_kernel(x);
63+
case RAMP:
64+
return ramp_activate_kernel(x);
65+
case LEAKY:
66+
return leaky_activate_kernel(x);
67+
case TANH:
68+
return tanh_activate_kernel(x);
69+
case PLSE:
70+
return plse_activate_kernel(x);
71+
case STAIR:
72+
return stair_activate_kernel(x);
73+
case HARDTAN:
74+
return hardtan_activate_kernel(x);
75+
case LHTAN:
76+
return lhtan_activate_kernel(x);
77+
}
78+
return 0;
79+
}
80+
81+
__global__ void activate_array_kernel(float *x, int n, ACTIVATION a)
82+
{
83+
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
84+
if(i < n) x[i] = activate_kernel(x[i], a);
85+
}
86+
87+
void activate_array_gpu(float *x, int n, ACTIVATION a)
88+
{
89+
activate_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a);
90+
check_error(cudaPeekAtLastError());
91+
}

activations.h

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
/*
2+
* Company: Systhesis
3+
* Author: Chen
4+
* Date: 2018/06/04
5+
*/
6+
7+
#ifndef __ACTIVATIONS_H_
8+
#define __ACTIVATIONS_H_
9+
10+
typedef enum{
11+
LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN
12+
} ACTIVATION;
13+
14+
void activate_array_gpu(float* x,int n,ACTIVATION a);
15+
16+
#endif

blas.h

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
/*
2+
* Company: Systhesis
3+
* Author: Chen
4+
* Date: 2018/06/04
5+
*/
6+
7+
#ifndef __BLAS_H_
8+
#define __BLAS_H_
9+
10+
void copy_gpu(int N,float* X,int INCX,float* Y,int INCY);
11+
12+
void fill_gpu(int N, float ALPHA, float * X, int INCX);
13+
14+
#endif

blas_kernels.cu

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/*
2+
* Company: Systhesis
3+
* Author: Chen
4+
* Date: 2018/06/04
5+
*/
6+
7+
8+
#include <assert.h>
9+
10+
#include "cuda.h"
11+
#include "blas.h"
12+
13+
__global__ void copy_kernel(int N,float* X,int OFFX,int INCX,float* Y,int OFFY,int INCY)
14+
{
15+
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
16+
if(i < N) Y[i*INCY + OFFY] = X[i*INCX + OFFX];
17+
}
18+
19+
__global__ void fill_kernel(int N, float ALPHA, float *X, int INCX)
20+
{
21+
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
22+
if(i < N) X[i*INCX] = ALPHA;
23+
}
24+
25+
void copy_gpu_offset(int N,float* X,int OFFX,int INCX,float* Y,int OFFY,int INCY)
26+
{
27+
copy_kernel<<<cuda_gridsize(N),BLOCK>>>(N,X,OFFX,INCX,Y,OFFY,INCY);
28+
check_error(cudaPeekAtLastError());
29+
}
30+
31+
void copy_gpu(int N,float* X,int INCX,float* Y,int INCY)
32+
{
33+
copy_gpu_offset(N,X,0,INCX,Y,0,INCY);
34+
}
35+
36+
37+
void fill_gpu(int N, float ALPHA, float * X, int INCX)
38+
{
39+
fill_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
40+
check_error(cudaPeekAtLastError());
41+
}

0 commit comments

Comments
 (0)