I am trying to copy a class object containing pointers to another. In particular, I have a class LikelihoodConstructor which contains an array of pointers to another class, DataModel which contains an array ‘bins’ which im trying to access. essentially in the kernel I would like to run is the following :
<code>__global__ void test_kernel(LikelihoodConstructor * gpuClass, int length, int bins){
for (int i =0; i< length;i++){
for (int j = 0;j <bins; j++){
printf("Length %i, bin %i, val %in",i,j,gpuClass->MyModels[i]->nBins);
printf("Length %i, bin %i, val %fn",i,j,gpuClass->MyModels[i]->bins[j]);
<code>__global__ void test_kernel(LikelihoodConstructor * gpuClass, int length, int bins){
for (int i =0; i< length;i++){
for (int j = 0;j <bins; j++){
printf("Length %i, bin %i, val %in",i,j,gpuClass->MyModels[i]->nBins);
//what I want to access
printf("Length %i, bin %i, val %fn",i,j,gpuClass->MyModels[i]->bins[j]);
}
}
printf("Exit Kerneln");
}
</code>
__global__ void test_kernel(LikelihoodConstructor * gpuClass, int length, int bins){
for (int i =0; i< length;i++){
for (int j = 0;j <bins; j++){
printf("Length %i, bin %i, val %in",i,j,gpuClass->MyModels[i]->nBins);
//what I want to access
printf("Length %i, bin %i, val %fn",i,j,gpuClass->MyModels[i]->bins[j]);
}
}
printf("Exit Kerneln");
}
The class structures are the following
double * bins; //thing i want to access
class LikelihoodConstructor{//class to go to GPU
<code>
class DataModel{
public :
int nBins;
double * bins; //thing i want to access
};
class LikelihoodConstructor{//class to go to GPU
public:
DataModel** MyModels;
};
</code>
class DataModel{
public :
int nBins;
double * bins; //thing i want to access
};
class LikelihoodConstructor{//class to go to GPU
public:
DataModel** MyModels;
};
The main in the following
std::uniform_real_distribution<> dis(-2,2);
//Initiate object on the CPU
LikelihoodConstructor cpuClass;
cpuClass.MyModels = new DataModel*[length];
for(int i = 0; i < length; ++i) {
cpuClass.MyModels[i] = new DataModel;
cpuClass.MyModels[i]->nBins = nBins;
cpuClass.MyModels[i]->bins = new double[nBins];
for (int j =0; j<nBins; j++){
cpuClass.MyModels[i]->bins[j] = dis(gen);
} // so we can prove that things are working
//Allocate storage for object onto GPU and copy host object to device
LikelihoodConstructor * gpuClass;
cudaMalloc(&gpuClass,sizeof(LikelihoodConstructor));
cudaMemcpy(gpuClass,&cpuClass,sizeof(LikelihoodConstructor),cudaMemcpyHostToDevice);
d_models = new DataModel*[length];
for(int i = 0; i < length; ++i) {
//allocate memory for each data model
cudaMalloc(&d_models[i],sizeof(DataModel));
cudaCheckErrors("cudaMalloc failure"); // error checking
cudaMemcpy(d_models[i], cpuClass.MyModels[i],sizeof(DataModel),cudaMemcpyHostToDevice);
cudaCheckErrors("cudaMemcpy H2D failure");
//This is the bit Im trying to figure out
//allocate memory for the array in d_models[i]->bins
cudaMalloc(&(d_models[i]->bins), nBins*sizeof(double));
cudaCheckErrors("cudaMalloc failure"); // error checking
//copy the bin content over... **This fails to run**
cudaMemcpy(d_models[i]->bins, cpuClass.MyModels[i]->bins,nBins*sizeof(double),cudaMemcpyHostToDevice);
// cudaCheckErrors("cudaMemcpy H2D failure");
//allocate the top level pointers
cudaMalloc(&td_models, length * sizeof(DataModel *));
cudaMemcpy(td_models, d_models, length * sizeof(DataModel *), cudaMemcpyHostToDevice);
//copy *pointer value* of td_par to appropriate location in top level object
cudaMemcpy(&(gpuClass->MyModels), &(td_models), sizeof(DataModel **), cudaMemcpyHostToDevice);
test_kernel<<<1,1>>>(gpuClass,length, nBins);
cudaCheckErrors("kernel launch failure");
for(int i = 0; i < length; ++i) {
<code>int main(){
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<> dis(-2,2);
int length = 2;
int nBins = 5;
//Initiate object on the CPU
LikelihoodConstructor cpuClass;
cpuClass.MyModels = new DataModel*[length];
for(int i = 0; i < length; ++i) {
cpuClass.MyModels[i] = new DataModel;
cpuClass.MyModels[i]->nBins = nBins;
cpuClass.MyModels[i]->bins = new double[nBins];
for (int j =0; j<nBins; j++){
cpuClass.MyModels[i]->bins[j] = dis(gen);
}
} // so we can prove that things are working
//Allocate storage for object onto GPU and copy host object to device
LikelihoodConstructor * gpuClass;
cudaMalloc(&gpuClass,sizeof(LikelihoodConstructor));
cudaMemcpy(gpuClass,&cpuClass,sizeof(LikelihoodConstructor),cudaMemcpyHostToDevice);
DataModel ** d_models;
d_models = new DataModel*[length];
for(int i = 0; i < length; ++i) {
//allocate memory for each data model
cudaMalloc(&d_models[i],sizeof(DataModel));
cudaCheckErrors("cudaMalloc failure"); // error checking
//copy models over
cudaMemcpy(d_models[i], cpuClass.MyModels[i],sizeof(DataModel),cudaMemcpyHostToDevice);
cudaCheckErrors("cudaMemcpy H2D failure");
//This is the bit Im trying to figure out
//allocate memory for the array in d_models[i]->bins
cudaMalloc(&(d_models[i]->bins), nBins*sizeof(double));
cudaCheckErrors("cudaMalloc failure"); // error checking
//copy the bin content over... **This fails to run**
cudaMemcpy(d_models[i]->bins, cpuClass.MyModels[i]->bins,nBins*sizeof(double),cudaMemcpyHostToDevice);
// cudaCheckErrors("cudaMemcpy H2D failure");
}
DataModel ** td_models;
//allocate the top level pointers
cudaMalloc(&td_models, length * sizeof(DataModel *));
cudaMemcpy(td_models, d_models, length * sizeof(DataModel *), cudaMemcpyHostToDevice);
//copy *pointer value* of td_par to appropriate location in top level object
cudaMemcpy(&(gpuClass->MyModels), &(td_models), sizeof(DataModel **), cudaMemcpyHostToDevice);
test_kernel<<<1,1>>>(gpuClass,length, nBins);
cudaCheckErrors("kernel launch failure");
cudaDeviceSynchronize();
//clean up
for(int i = 0; i < length; ++i) {
cudaFree(d_models[i]);
cudaFree(d_par[i]);
free(h_test[i]);
}
//free top level
cudaFree(td_models);
cudaFree(td_par);
cudaFree(gpuClass);
free(h_test);
return 0;
</code>
int main(){
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<> dis(-2,2);
int length = 2;
int nBins = 5;
//Initiate object on the CPU
LikelihoodConstructor cpuClass;
cpuClass.MyModels = new DataModel*[length];
for(int i = 0; i < length; ++i) {
cpuClass.MyModels[i] = new DataModel;
cpuClass.MyModels[i]->nBins = nBins;
cpuClass.MyModels[i]->bins = new double[nBins];
for (int j =0; j<nBins; j++){
cpuClass.MyModels[i]->bins[j] = dis(gen);
}
} // so we can prove that things are working
//Allocate storage for object onto GPU and copy host object to device
LikelihoodConstructor * gpuClass;
cudaMalloc(&gpuClass,sizeof(LikelihoodConstructor));
cudaMemcpy(gpuClass,&cpuClass,sizeof(LikelihoodConstructor),cudaMemcpyHostToDevice);
DataModel ** d_models;
d_models = new DataModel*[length];
for(int i = 0; i < length; ++i) {
//allocate memory for each data model
cudaMalloc(&d_models[i],sizeof(DataModel));
cudaCheckErrors("cudaMalloc failure"); // error checking
//copy models over
cudaMemcpy(d_models[i], cpuClass.MyModels[i],sizeof(DataModel),cudaMemcpyHostToDevice);
cudaCheckErrors("cudaMemcpy H2D failure");
//This is the bit Im trying to figure out
//allocate memory for the array in d_models[i]->bins
cudaMalloc(&(d_models[i]->bins), nBins*sizeof(double));
cudaCheckErrors("cudaMalloc failure"); // error checking
//copy the bin content over... **This fails to run**
cudaMemcpy(d_models[i]->bins, cpuClass.MyModels[i]->bins,nBins*sizeof(double),cudaMemcpyHostToDevice);
// cudaCheckErrors("cudaMemcpy H2D failure");
}
DataModel ** td_models;
//allocate the top level pointers
cudaMalloc(&td_models, length * sizeof(DataModel *));
cudaMemcpy(td_models, d_models, length * sizeof(DataModel *), cudaMemcpyHostToDevice);
//copy *pointer value* of td_par to appropriate location in top level object
cudaMemcpy(&(gpuClass->MyModels), &(td_models), sizeof(DataModel **), cudaMemcpyHostToDevice);
test_kernel<<<1,1>>>(gpuClass,length, nBins);
cudaCheckErrors("kernel launch failure");
cudaDeviceSynchronize();
//clean up
for(int i = 0; i < length; ++i) {
cudaFree(d_models[i]);
cudaFree(d_par[i]);
free(h_test[i]);
}
//free top level
cudaFree(td_models);
cudaFree(td_par);
cudaFree(gpuClass);
free(h_test);
return 0;
Code Compiles fine but fails when accessing gpuClass->MyModels[i]->bins[j]
<code>compute-sanitizer --leak-check full ./main
========= COMPUTE-SANITIZER
========= Error: process didn't terminate successfully
========= Target application returned an error
========= LEAK SUMMARY: 0 bytes leaked in 0 allocations
========= ERROR SUMMARY: 0 errors
<code>compute-sanitizer --leak-check full ./main
========= COMPUTE-SANITIZER
========= Error: process didn't terminate successfully
========= Target application returned an error
========= LEAK SUMMARY: 0 bytes leaked in 0 allocations
========= ERROR SUMMARY: 0 errors
</code>
compute-sanitizer --leak-check full ./main
========= COMPUTE-SANITIZER
========= Error: process didn't terminate successfully
========= Target application returned an error
========= LEAK SUMMARY: 0 bytes leaked in 0 allocations
========= ERROR SUMMARY: 0 errors
Ultimately I cant quite figure out how to link the gpuClass to the DataModel Bins on the device.
Ive used this post to get most of the way but i just cant find a way to access the bins.
Ive also tried:
<code>for(int i = 0; i < length; ++i) {
cudaMemcpy(gpuClass->MyModels[i]->bins, cpuClass.MyModels[i]->bins,nBins*sizeof(double),cudaMemcpyHostToDevice);
<code>for(int i = 0; i < length; ++i) {
cudaMemcpy(gpuClass->MyModels[i]->bins, cpuClass.MyModels[i]->bins,nBins*sizeof(double),cudaMemcpyHostToDevice);
}
</code>
for(int i = 0; i < length; ++i) {
cudaMemcpy(gpuClass->MyModels[i]->bins, cpuClass.MyModels[i]->bins,nBins*sizeof(double),cudaMemcpyHostToDevice);
}