I made a project on Win Forms, and in Visual Studio I made a second project using CUDA code. The idea is that the image from the form goes to CUDA, it is processed there and returned. But then the difficulties began – the result of the function, in theory, should be written to a variable of the Image class, and then the drawing is output to the form using this variable, but… Visual Studio tells me – an attempt to access protected memory. I have no idea how to get around, I thought and searched for quite a long time. Somehow fix the studio, fix the code or whatever, maybe it’s in the PC. Later, I think I’ll rewrite the code so that only the number, not the image, is transmitted back from CUDA, but I’m afraid in advance that it won’t work. The code is attached below.
The first piece is from the form itself, I get a function from the library, then I call it and try to do something, it throws an error on the last line. If you need to add important information – I`ll try to add something
HINSTANCE hGetProcIDDLL = LoadLibrary(L"CUDAWin.dll");
if (!hGetProcIDDLL)
{
throw gcnew Exception();
}
typedef int(__stdcall* function)(void*);
function calc = (function)GetProcAddress(hGetProcIDDLL, "calculate");
if (!calc)
{
throw gcnew Exception();
}
struct img* image = new img();
Bitmap^ bitmap = gcnew Bitmap(pct1->Image);`
The second one is already the receiving function on CUDA +, in which there is a return of the result.
CUDAWin.cpp
#include<windows.h>
#include "Kernel.cuh"
struct img
{
int width;
int height;
unsigned* image;
};
void* calculate(void* image)
{
struct img* im = (img*)image;
if (calc(im->image, im->width, im->height) == -1)
return NULL;
return im;
}
BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved)
{
switch (ul_reason_for_call)
{
case DLL_PROCESS_ATTACH:
case DLL_THREAD_ATTACH:
case DLL_THREAD_DETACH:
case DLL_PROCESS_DETACH:
break;
}
return TRUE;
}
Kernel.cu
__global__ void kernel(unsigned* src, int width,
int height) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if ((x < width) && (y < height))
{
unsigned pixel = src[y * width + x];
unsigned gray = ((pixel & 0x00FF0000 >> 0x10) +
(pixel & 0x0000FF00 >> 0x8) + (pixel & 0x000000FF))
/ 3;
src[y * width + x] = 0xFF000000 | (gray << 0x10) |
(gray << 0x8) | gray;
}
}
int calc(unsigned* srcImage, int width, int height)
{
unsigned* dev_srcImage;
size_t size = sizeof(unsigned) * width * height;
if (cudaMalloc((void**)&dev_srcImage, size) != cudaError::cudaSuccess)
return -1;
if (cudaMemcpy(dev_srcImage, srcImage, size, cudaMemcpyHostToDevice) != cudaError::cudaSuccess)
return -1;
// Максимальное количество нитей на блок может отличаться
dim3 threads(128, 128);
dim3 blocks((width + threads.x - 1) / threads.x, (height + threads.y - 1) / threads.y);
kernel << <threads, blocks >> > (dev_srcImage, width, height);
cudaError error = cudaMemcpy(srcImage, dev_srcImage, size, cudaMemcpyDeviceToHost);
if (error != cudaError::cudaSuccess) return -1;
cudaFree(dev_srcImage);
return 0;
}