2019年1月27日 星期日

[Cuda] 從C# 呼叫Cuda 處理 Bitmap 與 Mat 物件


最近使用C#處理PictureBox, Bitmap 與 C::Mat 物件, 做個重點記錄, 程式很長, 就不解釋了.



CudaKernel.cu

__global__ void copyKernel(byte *destArray, byte *srcArray, int channels)
{
     int p = (blockIdx.x*blockDim.x + threadIdx.x)*channels;
     for (int c = 0; c < channels; c++)
     {
         destArray[p + c] = srcArray[p + c]/2;
     }
}

………………………………

cudaError_t OpArray(byte* destArray, byte* srcArray, int width, int height, int channels)
{
     trace("OpArray()");

     if (width > 1024 || height > 1024) {
         trace("Image Size too Large");
         return cudaSuccess;
     }

     byte *dev_srcArray = 0;
     byte *dev_destArray = 0;
     cudaError_t cudaStatus;
     size_t pixelBytes = width * height * channels;

     sprintf(tracebuf, "pixelBytes=%d", (int)pixelBytes);
     trace(tracebuf);
    
     // Choose which GPU to run on, change this on a multi-GPU system.
     cudaStatus = cudaSetDevice(0);
     if (cudaStatus != cudaSuccess) {
          trace("cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?");
         goto Error;
     }

     // Allocate GPU buffers for three vectors (one input, one output)  

     trace("Allocate GPU buffers");
     cudaStatus = cudaMalloc((void**)&dev_srcArray, pixelBytes);
     if (cudaStatus != cudaSuccess) {
         trace("cudaMalloc dev_src failed!");
         goto Error;
     }

     cudaStatus = cudaMalloc((void**)&dev_destArray, pixelBytes);
     if (cudaStatus != cudaSuccess) {
         trace("cudaMalloc dev_dest failed!");
         goto Error;
     }

     // Copy input vectors from host memory to GPU buffers.
     cudaStatus = cudaMemcpy(dev_srcArray, srcArray, pixelBytes, cudaMemcpyHostToDevice);
     if (cudaStatus != cudaSuccess) {
         trace("cudaMemcpy failed!");
         goto Error;
     }
    
     // Launch a kernel on the GPU with one thread for each element.

     trace("start kernel");
     copyKernel << <height, width >> > (dev_destArray, dev_srcArray, channels);

     // Check for any errors launching the kernel
     cudaStatus = cudaGetLastError();
     if (cudaStatus != cudaSuccess) {
         sprintf(tracebuf, "ipKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
         trace(tracebuf);
         goto Error;
     }

     // cudaDeviceSynchronize waits for the kernel to finish, and returns
     // any errors encountered during the launch.
     cudaStatus = cudaDeviceSynchronize();
     if (cudaStatus != cudaSuccess) {
         sprintf(tracebuf, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
         trace(tracebuf);
         goto Error;
     }

     // Copy output vector from GPU buffer to host memory.

     trace("copy result");
     cudaStatus = cudaMemcpy(destArray, dev_destArray, pixelBytes, cudaMemcpyDeviceToHost);
     if (cudaStatus != cudaSuccess) {
         trace("cudaMemcpy failed!");
         goto Error;
     }
     // All done, reset the device
     cudaStatus = cudaDeviceReset();
     if (cudaStatus != cudaSuccess) {
         trace("cudaDeviceReset failed!");
         goto Error;
     }

Error:
     cudaFree(dev_srcArray);
     cudaFree(dev_destArray);

     trace("end");
     return cudaStatus;
}

cudaError_t OpMat(Mat *destMat, Mat *srcMat)
{
     trace("OpMat()");

     if (srcMat->cols > 1024 || srcMat->rows > 1024) {
         trace("Image Size too Large");
         return cudaSuccess;
     }

     byte *dev_srcMat = 0;
     byte *dev_destMat = 0;
     size_t pixelBytes = srcMat->total() *srcMat->elemSize();

     cudaError_t cudaStatus;

     sprintf(tracebuf, "pixelBytes=%d", pixelBytes);
     trace(tracebuf);

     // Choose which GPU to run on
     cudaStatus = cudaSetDevice(0);
     if (cudaStatus != cudaSuccess) {
         trace("cudaSetDevice failed!");
         goto Error;
     }

     // Allocate GPU buffers

     cudaStatus = cudaMalloc((void**)&dev_srcMat, pixelBytes);
     if (cudaStatus != cudaSuccess) {
         trace("cudaMalloc failed!");
         goto Error;
     }
     cudaStatus = cudaMalloc((void**)&dev_destMat, pixelBytes);
     if (cudaStatus != cudaSuccess) {
         fprintf(stderr, "cudaMalloc failed!");
         goto Error;
     }

     // Copy from host memory to GPU buffers.
     cudaStatus = cudaMemcpy(dev_srcMat, srcMat->data, pixelBytes, cudaMemcpyHostToDevice);
     if (cudaStatus != cudaSuccess) {
         trace("cudaMemcpy failed!");
         goto Error;
     }

     int channels = srcMat->channels();
     copyKernel << <srcMat->rows, srcMat->cols >> > (dev_destMat, dev_srcMat, channels);

     // Check for any errors launching the kernel
     cudaStatus = cudaGetLastError();
     if (cudaStatus != cudaSuccess) {
         sprintf(tracebuf, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
         trace(tracebuf);
         goto Error;
     }

     // cudaDeviceSynchronize waits for the kernel to finish
     cudaStatus = cudaDeviceSynchronize();
     if (cudaStatus != cudaSuccess) {
         sprintf(tracebuf, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
         trace(tracebuf);
         goto Error;
     }

     // Copy output from GPU buffer to host memory.
     cudaStatus = cudaMemcpy(destMat->data, dev_destMat, pixelBytes, cudaMemcpyDeviceToHost);
     if (cudaStatus != cudaSuccess) {
         trace("cudaMemcpy failed!");
         goto Error;
     }
    
     // All done, reset the device
     cudaStatus = cudaDeviceReset();
     if (cudaStatus != cudaSuccess) {
         trace("cudaDeviceReset failed!");
         goto Error;
     }
Error:
     cudaFree(dev_srcMat);
     cudaFree(dev_destMat);

     trace("end");
     return cudaStatus;
}



CudaWrapper.cpp

extern "C" DLLEXPORT bool testArray(byte* destArray, byte* srcArray, int width, int height, int channels)
{
     trace("testArray()");
     cudaError_t cudaStatus = OpArray(destArray, srcArray, width,height, channels);
     return (cudaStatus == cudaSuccess);
}

extern "C" DLLEXPORT bool testMat(Mat *destMat, Mat *srcMat)
{
     trace("testMat()");
     cudaError_t cudaStatus = OpMat(destMat, srcMat);
     return (cudaStatus == cudaSuccess);
}


MainForm.cs

        [DllImport("RmtCoreCuda.dll", EntryPoint = "testArray", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
        unsafe private static extern bool testArray(IntPtr dest, IntPtr src, int width, int height, int channels);
        [DllImport("RmtCoreCuda.dll", EntryPoint = "testMat", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)]
        private static extern bool testMat(IntPtr destMat, IntPtr srcMat);

………………………………
       private void testCudaMat()
        {
            //String filename = "D:\\LENA.JPG";
            String filename = "D:\\MARBLES.BMP";
            //String filename = "D:\\LENA.JPG";
            Mat srcMat = CvInvoke.Imread(filename); //, Emgu.CV.CvEnum.ImreadModes.Grayscale); // Read the file and convert to grayscale


            statusMessage.Text = "";
            if (srcMat == null)
            {
                statusMessage.Text = "No Source Picture";
                return;
            }
            if (srcMat.Width > 1024 || srcMat.Height > 1024)
            {
                statusMessage.Text = "Source Picture too large";
                return;
            }

            //Mat destMat = srcMat; // new Mat(srcMat.Size, srcMat.GetType(), srcMat.NumberOfChannels);
            Mat destMat = new Mat(srcMat.Size, DepthType.Cv8U, srcMat.NumberOfChannels);
            testMat(destMat, srcMat);
            CvInvoke.Imshow("destMat", destMat);
        }

        private void testCudaArray()
        {
            statusMessage.Text = "";
            if (picSource==null)
            {
                statusMessage.Text = "No Source Picture";
                return;
            }
            if (picSource.Image.Width>1024 || picSource.Image.Height>1024)
            {
                statusMessage.Text = "Source Picture too large";
                return;
            }

            Bitmap srcImage = (Bitmap)picSource.Image;
            Rectangle size = new Rectangle(0, 0, srcImage.Width, srcImage.Height);
            Bitmap destImage = new Bitmap(srcImage.Width, srcImage.Height, srcImage.PixelFormat);

            BitmapData srcData = srcImage.LockBits(size, ImageLockMode.ReadOnly, srcImage.PixelFormat);
            BitmapData destData = destImage.LockBits(size, ImageLockMode.ReadWrite, srcImage.PixelFormat);

            int channels = 0;
            unsafe
            {
                IntPtr srcPtr = srcData.Scan0;
                IntPtr destPtr = destData.Scan0;
                switch (srcImage.PixelFormat)
                {
                    case PixelFormat.Format8bppIndexed:
                        channels = 1; break;
                    case PixelFormat.Format16bppArgb1555:
                    case PixelFormat.Format16bppGrayScale:
                    case PixelFormat.Format16bppRgb555:
                    case PixelFormat.Format16bppRgb565:
                        channels = 2; break;
                    case PixelFormat.Format24bppRgb:
                        channels = 3; break;
                    case PixelFormat.Format32bppArgb:
                    case PixelFormat.Format32bppPArgb:
                    case PixelFormat.Format32bppRgb:
                        channels = 4; break;

                    default: channels = 0; break;
                }
                testArray(destPtr, srcPtr, srcImage.Width, srcImage.Height, channels);

                int byteLength = srcImage.Width * srcImage.Height * channels;
                byte[] srcByte = new byte[byteLength];
                byte[] destByte = new byte[byteLength];

                Marshal.Copy(srcPtr, srcByte, 0, byteLength);
                Marshal.Copy(destPtr, destByte, 0, byteLength);
                File.WriteAllBytes("d:\\srcData.bin", srcByte);
                File.WriteAllBytes("d:\\destData.bin",destByte);
            }
            //destImage.Palette = srcImage.Palette;
            srcImage.UnlockBits(srcData);
            destImage.UnlockBits(destData);

            picSource.Image = srcImage;

            if (channels == 3)
            {
                //set alpha=255 to show the image
                for (int w = 0; w < destImage.Width; w++)
                {
                    for (int h = 0; h < destImage.Height; h++)
                    {
                        Color c = destImage.GetPixel(w, h);
                        Color newC = Color.FromArgb(255, c);
                        destImage.SetPixel(w, h, newC);
                    }
                }
            }
            picOutput.Image = destImage;
        }

沒有留言:

張貼留言