Thiết kế website giá rẻ

Question

I am trying to copy a class object containing pointers to another. In particular, I have a class LikelihoodConstructor which contains an array of pointers to another class, DataModel which contains an array ‘bins’ which im trying to access. essentially in the kernel I would like to run is the following :

<code>__global__ void test_kernel(LikelihoodConstructor * gpuClass, int length, int bins){

for (int i =0; i< length;i++){

for (int j = 0;j <bins; j++){

printf("Length %i, bin %i, val %in",i,j,gpuClass->MyModels[i]->nBins);

//what I want to access

printf("Length %i, bin %i, val %fn",i,j,gpuClass->MyModels[i]->bins[j]);

}

printf("Exit Kerneln");

}

</code>

<code>__global__ void test_kernel(LikelihoodConstructor * gpuClass, int length, int bins){ for (int i =0; i< length;i++){ for (int j = 0;j <bins; j++){ printf("Length %i, bin %i, val %in",i,j,gpuClass->MyModels[i]->nBins); //what I want to access printf("Length %i, bin %i, val %fn",i,j,gpuClass->MyModels[i]->bins[j]); } } printf("Exit Kerneln"); } </code>

__global__ void test_kernel(LikelihoodConstructor * gpuClass, int length, int bins){
    
    for (int i =0; i< length;i++){
        for (int j = 0;j <bins; j++){
            printf("Length %i, bin %i, val %in",i,j,gpuClass->MyModels[i]->nBins);
            
            //what I want to access
            printf("Length %i, bin %i, val %fn",i,j,gpuClass->MyModels[i]->bins[j]);
        }
    }
    printf("Exit Kerneln");   
}

The class structures are the following

<code>

class DataModel{

public :

int nBins;

double * bins; //thing i want to access

};

class LikelihoodConstructor{//class to go to GPU

public:

DataModel** MyModels;

};

</code>

<code> class DataModel{ public : int nBins; double * bins; //thing i want to access }; class LikelihoodConstructor{//class to go to GPU public: DataModel** MyModels; }; </code>


class DataModel{
    public :
        int nBins;
        double * bins; //thing i want to access
};

class LikelihoodConstructor{//class  to go to GPU
    public:
        DataModel** MyModels;
   
     
};

The main in the following

<code>int main(){

std::random_device rd;

std::mt19937 gen(rd());

std::uniform_real_distribution<> dis(-2,2);

int length = 2;

int nBins = 5;

//Initiate object on the CPU

LikelihoodConstructor cpuClass;

cpuClass.MyModels = new DataModel*[length];

for(int i = 0; i < length; ++i) {

cpuClass.MyModels[i] = new DataModel;

cpuClass.MyModels[i]->nBins = nBins;

cpuClass.MyModels[i]->bins = new double[nBins];

for (int j =0; j<nBins; j++){

cpuClass.MyModels[i]->bins[j] = dis(gen);

}

} // so we can prove that things are working

//Allocate storage for object onto GPU and copy host object to device

LikelihoodConstructor * gpuClass;

cudaMalloc(&gpuClass,sizeof(LikelihoodConstructor));

cudaMemcpy(gpuClass,&cpuClass,sizeof(LikelihoodConstructor),cudaMemcpyHostToDevice);

DataModel ** d_models;

d_models = new DataModel*[length];

for(int i = 0; i < length; ++i) {

//allocate memory for each data model

cudaMalloc(&d_models[i],sizeof(DataModel));

cudaCheckErrors("cudaMalloc failure"); // error checking

//copy models over

cudaMemcpy(d_models[i], cpuClass.MyModels[i],sizeof(DataModel),cudaMemcpyHostToDevice);

cudaCheckErrors("cudaMemcpy H2D failure");

//This is the bit Im trying to figure out

//allocate memory for the array in d_models[i]->bins

cudaMalloc(&(d_models[i]->bins), nBins*sizeof(double));

cudaCheckErrors("cudaMalloc failure"); // error checking

//copy the bin content over... **This fails to run**

cudaMemcpy(d_models[i]->bins, cpuClass.MyModels[i]->bins,nBins*sizeof(double),cudaMemcpyHostToDevice);

// cudaCheckErrors("cudaMemcpy H2D failure");

}

DataModel ** td_models;

//allocate the top level pointers

cudaMalloc(&td_models, length * sizeof(DataModel *));

cudaMemcpy(td_models, d_models, length * sizeof(DataModel *), cudaMemcpyHostToDevice);

//copy *pointer value* of td_par to appropriate location in top level object

cudaMemcpy(&(gpuClass->MyModels), &(td_models), sizeof(DataModel **), cudaMemcpyHostToDevice);

test_kernel<<<1,1>>>(gpuClass,length, nBins);

cudaCheckErrors("kernel launch failure");

cudaDeviceSynchronize();

//clean up

for(int i = 0; i < length; ++i) {

cudaFree(d_models[i]);

cudaFree(d_par[i]);

free(h_test[i]);

}

//free top level

cudaFree(td_models);

cudaFree(td_par);

cudaFree(gpuClass);

free(h_test);

return 0;

</code>

<code>int main(){ std::random_device rd; std::mt19937 gen(rd()); std::uniform_real_distribution<> dis(-2,2); int length = 2; int nBins = 5; //Initiate object on the CPU LikelihoodConstructor cpuClass; cpuClass.MyModels = new DataModel*[length]; for(int i = 0; i < length; ++i) { cpuClass.MyModels[i] = new DataModel; cpuClass.MyModels[i]->nBins = nBins; cpuClass.MyModels[i]->bins = new double[nBins]; for (int j =0; j<nBins; j++){ cpuClass.MyModels[i]->bins[j] = dis(gen); } } // so we can prove that things are working //Allocate storage for object onto GPU and copy host object to device LikelihoodConstructor * gpuClass; cudaMalloc(&gpuClass,sizeof(LikelihoodConstructor)); cudaMemcpy(gpuClass,&cpuClass,sizeof(LikelihoodConstructor),cudaMemcpyHostToDevice); DataModel ** d_models; d_models = new DataModel*[length]; for(int i = 0; i < length; ++i) { //allocate memory for each data model cudaMalloc(&d_models[i],sizeof(DataModel)); cudaCheckErrors("cudaMalloc failure"); // error checking //copy models over cudaMemcpy(d_models[i], cpuClass.MyModels[i],sizeof(DataModel),cudaMemcpyHostToDevice); cudaCheckErrors("cudaMemcpy H2D failure"); //This is the bit Im trying to figure out //allocate memory for the array in d_models[i]->bins cudaMalloc(&(d_models[i]->bins), nBins*sizeof(double)); cudaCheckErrors("cudaMalloc failure"); // error checking //copy the bin content over... **This fails to run** cudaMemcpy(d_models[i]->bins, cpuClass.MyModels[i]->bins,nBins*sizeof(double),cudaMemcpyHostToDevice); // cudaCheckErrors("cudaMemcpy H2D failure"); } DataModel ** td_models; //allocate the top level pointers cudaMalloc(&td_models, length * sizeof(DataModel *)); cudaMemcpy(td_models, d_models, length * sizeof(DataModel *), cudaMemcpyHostToDevice); //copy *pointer value* of td_par to appropriate location in top level object cudaMemcpy(&(gpuClass->MyModels), &(td_models), sizeof(DataModel **), cudaMemcpyHostToDevice); test_kernel<<<1,1>>>(gpuClass,length, nBins); cudaCheckErrors("kernel launch failure"); cudaDeviceSynchronize(); //clean up for(int i = 0; i < length; ++i) { cudaFree(d_models[i]); cudaFree(d_par[i]); free(h_test[i]); } //free top level cudaFree(td_models); cudaFree(td_par); cudaFree(gpuClass); free(h_test); return 0; </code>

int main(){
    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_real_distribution<> dis(-2,2);
    int length = 2;
    int nBins  = 5;

    //Initiate object on the CPU
    LikelihoodConstructor cpuClass;
    cpuClass.MyModels = new DataModel*[length];

    for(int i = 0; i < length; ++i) {
        cpuClass.MyModels[i] = new DataModel;
        cpuClass.MyModels[i]->nBins = nBins;
        cpuClass.MyModels[i]->bins = new double[nBins];
        for (int j =0; j<nBins; j++){
            cpuClass.MyModels[i]->bins[j] = dis(gen);
        }
    } // so we can prove that things are working

    //Allocate storage for object onto GPU and copy host object to device
    LikelihoodConstructor * gpuClass;
    cudaMalloc(&gpuClass,sizeof(LikelihoodConstructor));
    cudaMemcpy(gpuClass,&cpuClass,sizeof(LikelihoodConstructor),cudaMemcpyHostToDevice);
    
    DataModel ** d_models;
    d_models = new DataModel*[length];

    for(int i = 0; i < length; ++i) {
        //allocate memory for each data model
        cudaMalloc(&d_models[i],sizeof(DataModel));
        cudaCheckErrors("cudaMalloc failure"); // error checking
        
        //copy models over
        cudaMemcpy(d_models[i], cpuClass.MyModels[i],sizeof(DataModel),cudaMemcpyHostToDevice);
        cudaCheckErrors("cudaMemcpy H2D failure");
        //This is the bit Im trying to figure out
        //allocate memory for the array in d_models[i]->bins
        cudaMalloc(&(d_models[i]->bins), nBins*sizeof(double));
        cudaCheckErrors("cudaMalloc failure"); // error checking

        //copy the bin content over... **This fails to run**
        cudaMemcpy(d_models[i]->bins, cpuClass.MyModels[i]->bins,nBins*sizeof(double),cudaMemcpyHostToDevice);
        // cudaCheckErrors("cudaMemcpy H2D failure");
    }

    DataModel ** td_models;
    //allocate the top level pointers
    cudaMalloc(&td_models, length * sizeof(DataModel *));
    cudaMemcpy(td_models, d_models, length * sizeof(DataModel *), cudaMemcpyHostToDevice);
    //copy *pointer value* of td_par to appropriate location in top level object
    cudaMemcpy(&(gpuClass->MyModels), &(td_models), sizeof(DataModel **), cudaMemcpyHostToDevice);

    test_kernel<<<1,1>>>(gpuClass,length, nBins);
    cudaCheckErrors("kernel launch failure");
    cudaDeviceSynchronize();

    //clean up
    for(int i = 0; i < length; ++i) {
        cudaFree(d_models[i]);
        cudaFree(d_par[i]);
        free(h_test[i]);
    }
    //free top level
    cudaFree(td_models);
    cudaFree(td_par);
    cudaFree(gpuClass);
    free(h_test);

    return 0;

Code Compiles fine but fails when accessing gpuClass->MyModels[i]->bins[j]

<code>compute-sanitizer --leak-check full ./main

========= COMPUTE-SANITIZER

========= Error: process didn't terminate successfully

========= Target application returned an error

========= LEAK SUMMARY: 0 bytes leaked in 0 allocations

========= ERROR SUMMARY: 0 errors

</code>

<code>compute-sanitizer --leak-check full ./main ========= COMPUTE-SANITIZER ========= Error: process didn't terminate successfully ========= Target application returned an error ========= LEAK SUMMARY: 0 bytes leaked in 0 allocations ========= ERROR SUMMARY: 0 errors </code>

compute-sanitizer --leak-check full ./main
========= COMPUTE-SANITIZER
========= Error: process didn't terminate successfully
========= Target application returned an error
========= LEAK SUMMARY: 0 bytes leaked in 0 allocations
========= ERROR SUMMARY: 0 errors

Ultimately I cant quite figure out how to link the gpuClass to the DataModel Bins on the device.

Ive used this post to get most of the way but i just cant find a way to access the bins.

Ive also tried:

<code>for(int i = 0; i < length; ++i) {

cudaMemcpy(gpuClass->MyModels[i]->bins, cpuClass.MyModels[i]->bins,nBins*sizeof(double),cudaMemcpyHostToDevice);

}

</code>

<code>for(int i = 0; i < length; ++i) { cudaMemcpy(gpuClass->MyModels[i]->bins, cpuClass.MyModels[i]->bins,nBins*sizeof(double),cudaMemcpyHostToDevice); } </code>

for(int i = 0; i < length; ++i) {
        cudaMemcpy(gpuClass->MyModels[i]->bins, cpuClass.MyModels[i]->bins,nBins*sizeof(double),cudaMemcpyHostToDevice);
    }

Thiết kế website giá rẻ

Danh mục

CUDA copy class object containg pointer to another class