Skip to content
Snippets Groups Projects
Commit 86a3d694 authored by Sebastian Gomez-Gonzalez's avatar Sebastian Gomez-Gonzalez
Browse files

Adding support for the compilation with cuda (Not tested yet)

parent e3b7acab
No related branches found
No related tags found
No related merge requests found
......@@ -2,11 +2,19 @@ cmake_minimum_required(VERSION 3.5)
project(ball_tracking)
find_package(OpenCV REQUIRED)
find_package(CUDA 8.0 REQUIRED)
option (PYLIB "Create a Python Module with interface to some of the C++ implementations" ON)
include_directories(include
${OpenCV_INCLUDES})
${OpenCV_INCLUDES}
${CUDA_INCLUDE_DIRS})
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -std=c++11 -arch=sm_30" )
cuda_add_library(cu_ball_track
src/cuda/img_proc.cu
)
add_library(ball_tracking SHARED
src/img_proc.cpp
src/utils.cpp
......@@ -14,6 +22,7 @@ add_library(ball_tracking SHARED
)
target_link_libraries(ball_tracking
${OpenCV_LIBS}
cu_ball_track
)
#Compile with C++11 support only
......
......@@ -4,7 +4,6 @@
#include <opencv2/opencv.hpp>
#include <memory>
#include <functional>
#include <json.hpp>
namespace ball_tracking {
......@@ -35,6 +34,22 @@ namespace ball_tracking {
*/
cv::Mat quadf_log_reg(cv::InputArray src, cv::InputArray bkg, cv::InputArray weights);
namespace cuda {
/**
* @brief Applies a pixel-wise logistic regression with quadratic features to the source
* image and returns a single-channel image with the log-probabilities in the GPU
*
* @param[in] src The source image on the GPU
* @param[in] bkg A background image (Without the ball) on the GPU
* @param[in] weights Vector of weights of logistic regression
* @param[out] dst The destination image in the GPU
* @param[in] stream The stream of execution in the GPU
*/
void quadf_log_reg(const cv::cuda::GpuMat& src, const cv::cuda::GpuMat& bkg,
const cv::cuda::GpuMat weights, cv::cuda::GpuMat& dst, cv::cuda::Stream& stream);
};
};
#endif
#include "opencv2/cudev.hpp"
#include "opencv2/opencv_modules.hpp"
#include <ball_tracking/img_proc.hpp>
using namespace cv;
using namespace cv::cuda;
namespace ball_tracking {
namespace {
__global__ void quadf_log_reg_kernel(const PtrStepSz<uchar3> src,
const PtrStepSz<uchar3> bkg, const PtrStepSz<double> weights,
PtrStepSz<double> dst) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
const unsigned int n = x + blockIdx.x*blockDim.x*y;
__shared__ double w[28];
if (n<28) w[n] = weights(0,n);
__syncthreads();
if (x < src.rows && y < src.cols) {
double sum = 0.0;
uchar3 it = src(x,y), it_bkg = bkg(x,y);
const double lfeat[7] = {it.x/255.0, it.y/255.0, it.z/255.0,
it_bkg.x/255.0, it_bkg.y/255.0, it_bkg.z/255.0, 1.0};
const double* wptr = w;
for (unsigned int i=0; i<7; i++) {
for (unsigned int j=i; j<7; j++, wptr++) {
sum += (*wptr) * lfeat[i] * lfeat[j];
}
}
dst(y,x) = sum;
}
}
};
namespace cuda {
/**
* Require that src and bkg are of equal dimensions and that weights
* is a vector of 28 dimensions
*/
void quadf_log_reg(const GpuMat& src, const GpuMat& bkg,
const GpuMat weights, GpuMat& dst, Stream& stream = Stream::Null()) {
CV_Assert(src.rows==bkg.rows && src.cols==bkg.cols);
dim3 block(32,8);
dim3 grid((src.cols + block.x - 1)/block.x,
(src.rows + block.y - 1) / block.y);
dst.create(src.size(), CV_64FC1);
cudaStream_t s = StreamAccessor::getStream(stream);
quadf_log_reg_kernel<<<grid,block,0,s>>>(src, bkg, weights, dst);
if (s == 0)
cudaDeviceSynchronize();
}
};
};
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment