From 86a3d694eb2876aaaf6048088a9d76f8472f769f Mon Sep 17 00:00:00 2001 From: Sebastian Gomez-Gonzalez <sgomez@tue.mpg.de> Date: Thu, 14 Sep 2017 16:54:10 +0200 Subject: [PATCH] Adding support for the compilation with cuda (Not tested yet) --- CMakeLists.txt | 11 +++++- include/ball_tracking/img_proc.hpp | 17 ++++++++- src/cuda/img_proc.cu | 61 ++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 src/cuda/img_proc.cu diff --git a/CMakeLists.txt b/CMakeLists.txt index 22b96b6..8620c05 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,11 +2,19 @@ cmake_minimum_required(VERSION 3.5) project(ball_tracking) find_package(OpenCV REQUIRED) +find_package(CUDA 8.0 REQUIRED) option (PYLIB "Create a Python Module with interface to some of the C++ implementations" ON) include_directories(include - ${OpenCV_INCLUDES}) + ${OpenCV_INCLUDES} + ${CUDA_INCLUDE_DIRS}) + +set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -std=c++11 -arch=sm_30" ) +cuda_add_library(cu_ball_track + src/cuda/img_proc.cu + ) + add_library(ball_tracking SHARED src/img_proc.cpp src/utils.cpp @@ -14,6 +22,7 @@ add_library(ball_tracking SHARED ) target_link_libraries(ball_tracking ${OpenCV_LIBS} + cu_ball_track ) #Compile with C++11 support only diff --git a/include/ball_tracking/img_proc.hpp b/include/ball_tracking/img_proc.hpp index 6b90e12..6da0a7f 100644 --- a/include/ball_tracking/img_proc.hpp +++ b/include/ball_tracking/img_proc.hpp @@ -4,7 +4,6 @@ #include <opencv2/opencv.hpp> #include <memory> #include <functional> -#include <json.hpp> namespace ball_tracking { @@ -35,6 +34,22 @@ namespace ball_tracking { */ cv::Mat quadf_log_reg(cv::InputArray src, cv::InputArray bkg, cv::InputArray weights); + namespace cuda { + + /** + * @brief Applies a pixel-wise logistic regression with quadratic features to the source + * image and returns a single-channel image with the log-probabilities in the GPU + * + * @param[in] src The source image on the GPU + * @param[in] bkg A background image (Without the ball) on the GPU + * @param[in] weights Vector of weights of logistic regression + * @param[out] dst The destination image in the GPU + * @param[in] stream The stream of execution in the GPU + */ + void quadf_log_reg(const cv::cuda::GpuMat& src, const cv::cuda::GpuMat& bkg, + const cv::cuda::GpuMat weights, cv::cuda::GpuMat& dst, cv::cuda::Stream& stream); + }; + }; #endif diff --git a/src/cuda/img_proc.cu b/src/cuda/img_proc.cu new file mode 100644 index 0000000..eb1a886 --- /dev/null +++ b/src/cuda/img_proc.cu @@ -0,0 +1,61 @@ + +#include "opencv2/cudev.hpp" +#include "opencv2/opencv_modules.hpp" +#include <ball_tracking/img_proc.hpp> + +using namespace cv; +using namespace cv::cuda; + +namespace ball_tracking { + + namespace { + __global__ void quadf_log_reg_kernel(const PtrStepSz<uchar3> src, + const PtrStepSz<uchar3> bkg, const PtrStepSz<double> weights, + PtrStepSz<double> dst) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + const unsigned int n = x + blockIdx.x*blockDim.x*y; + + __shared__ double w[28]; + if (n<28) w[n] = weights(0,n); + __syncthreads(); + + if (x < src.rows && y < src.cols) { + double sum = 0.0; + uchar3 it = src(x,y), it_bkg = bkg(x,y); + const double lfeat[7] = {it.x/255.0, it.y/255.0, it.z/255.0, + it_bkg.x/255.0, it_bkg.y/255.0, it_bkg.z/255.0, 1.0}; + const double* wptr = w; + for (unsigned int i=0; i<7; i++) { + for (unsigned int j=i; j<7; j++, wptr++) { + sum += (*wptr) * lfeat[i] * lfeat[j]; + } + } + dst(y,x) = sum; + } + } + }; + + namespace cuda { + /** + * Require that src and bkg are of equal dimensions and that weights + * is a vector of 28 dimensions + */ + void quadf_log_reg(const GpuMat& src, const GpuMat& bkg, + const GpuMat weights, GpuMat& dst, Stream& stream = Stream::Null()) { + CV_Assert(src.rows==bkg.rows && src.cols==bkg.cols); + dim3 block(32,8); + dim3 grid((src.cols + block.x - 1)/block.x, + (src.rows + block.y - 1) / block.y); + + dst.create(src.size(), CV_64FC1); + cudaStream_t s = StreamAccessor::getStream(stream); + + quadf_log_reg_kernel<<<grid,block,0,s>>>(src, bkg, weights, dst); + + if (s == 0) + cudaDeviceSynchronize(); + } + }; + +}; -- GitLab