建立名稱為 OpenCvExample 的 CUDA 專案, CUDA精靈會自動建立一個 kernel.cu 檔 :
在專案中增加 main.cpp 與 OpenCvExample.h檔:
專案預設將編譯為 x64 版本, 因此在設定專案的 VC++ 的 include 與 lib 目錄屬性, 將 include 設定為 $(OPENCV_DIR)\include, lib目錄設定為: $(OPENCV_DIR)\x64\vc15\lib :
設定 VC 的連結程式庫名稱, 加入 opencv_world343d.lib (Debug版) 或 opencv_world343.lib (Release版):
本例只有一個很簡單的複製功能, 主要是展示如何在 kernel 存取 Mat 物件的影像資料 data, 這樣便能利用 OpenCV 的檔案讀寫功能來進行檔案處理, 配合改變 kernel 函數即可做出不同的影像處理結果. 專案程式的函數說明:
函 數
|
說
明
|
main()
|
1.
建立 srcMat 並讀取影像檔
2. 建立 destMat 準備放置處理結果
3. 呼叫在 kernel.cu的處理函數 CopyImage()
4. 顯示處理結果
|
CopyImage()
|
1.
準備 CUDA 的 device memory
2.
呼叫 kernel 函數 copy()
3.
複製處理結果至輸出 dest
|
__global__ void
copy()
|
平行處理複製來源影像 src至輸出影像 dest
|
3個程式檔案的內容如下 :
OpenCvExample.h :
#pragma once
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include "opencv2/imgproc.hpp"
#include <opencv2/core/cuda.hpp>
#include <Windows.h>
using namespace cv;
using namespace cuda;
__global__ void copy(byte *dest, byte *src, int channels);
void CopyImage(Mat *dest, Mat *src);
|
kernel.cu :
#include "OpenCvExample.h"
#define WindowRadius 1
__global__ void copy(byte *dest, byte *src, int channels)
{
int p =
(blockIdx.x*blockDim.x + threadIdx.x)*channels;
for (int c = 0; c < channels; c++)
{
dest[p + c] = src[p + c];
}
}
void CopyImage(Mat *dest, Mat *src)
{
byte *dev_src = 0;
byte *dev_dest = 0;
size_t pixelBytes = src->total() *src->elemSize();
cudaError_t
cudaStatus;
// Choose which GPU to run on
cudaStatus =
cudaSetDevice(0);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaSetDevice failed!");
return;
}
// Allocate GPU buffers
cudaStatus =
cudaMalloc((void**)&dev_src, pixelBytes);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
return;
}
cudaStatus =
cudaMalloc((void**)&dev_dest, pixelBytes);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
return;
}
// Copy from host memory to GPU buffers.
cudaStatus =
cudaMemcpy(dev_src, src->data, pixelBytes, cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
return;
}
int Channel = src->channels();
copy<<
<src->rows, src->cols >> > (dev_dest, dev_src, Channel);
// Check for any errors launching the kernel
cudaStatus =
cudaGetLastError();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
return;
}
// cudaDeviceSynchronize waits for the kernel to finish
cudaStatus =
cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaDeviceSynchronize returned error code %d
after launching addKernel!\n",
cudaStatus);
return;
}
// Copy output from GPU buffer to host memory.
cudaStatus =
cudaMemcpy(dest->data, dev_dest, pixelBytes, cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed!");
return;
}
//check result
/*
bool ok =
true;
uint32_t
Width = src->cols;
uint32_t
Height = src->rows;
for (uint32_t
x = 0; x < Width; x++)
{
for
(uint32_t y = 0; y < Height; y++)
{
for
(int c = 0; c < Channel; c++) {
if
(dest->data[(y*Width + x)*Channel + c] != src->data[(y*Width +
x)*Channel + c])
{
ok
= false;
break;
}
}
}
}
fprintf(stderr,
"ok=%s\n",ok?"ok":"fail");
*/
// All done, reset the device
cudaStatus =
cudaDeviceReset(); if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaDeviceReset failed!");
return;
}
}
|
main.cpp :
#include <stdio.h>
#include <iostream>
#include "OpenCvExample.h"
using namespace std;
int main(int argc, char** argv)
{
cudaError_t
cudaStatus;
char *filename = "D:\\LENA.JPG";
// Load source image
Mat srcMat =
imread(filename, IMREAD_COLOR); // GRAYSCALE); //
Read the file and convert to grayscale
if (srcMat.empty()) // Check for invalid input
{
cout << "Could not open or find the image" << std::endl;
return 1;
}
//create output Mat of the same size as srcMt
Mat destMat;
destMat.create(srcMat.rows,
srcMat.cols, srcMat.type());
// Run the parallel algorithm which should be in the
.cu file
CopyImage(&destMat,
&srcMat);
namedWindow("Source window", WINDOW_AUTOSIZE); // Create a window
for display.
imshow("Source window", srcMat); //
Show our image inside it.
namedWindow("Dest window", WINDOW_AUTOSIZE); // Create a window
for display.
imshow("Dest window", destMat); //
Show our image inside it.
waitKey(0); // Wait for a keystroke in the window
return 0;
}
|
執行結果畫面 :
沒有留言:
張貼留言