I am trying to copy the variable d_output->list from device to the host using cudaMemcpy but I am obtaining Segmentation fault (core dumped) Could you please let me know why?
<code>#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdarg.h>
#ifdef __CUDACC__
#define CUDA_HOSTDEV __host__ __device__
#else
#define CUDA_HOSTDEV
#endif
//msb are at the beginning (index 0)
typedef struct {
uint8_t *list;
uint16_t bit_size;
} BitString;
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %dn", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
__global__ void evaluate(BitString * d_output, uint8_t * d_list) {
BitString *input;
input = (BitString *) malloc(sizeof(BitString));
input->bit_size = 3;
input->list = (uint8_t *) malloc(sizeof(uint8_t)*3);
d_output->list = (uint8_t *) malloc(sizeof(uint8_t)*3);
input->list[0] = 1;
input->list[1] = 0;
input->list[2] = 1;
d_output->bit_size = input->bit_size;
d_output = input;
d_output->list = input->list;
memcpy(d_output->list, input->list, sizeof(uint8_t)*input->bit_size);
printf("d_output->list inside kernel: %dn", d_output->list[0]);
}
int main(int argc, char *argv[]) {
BitString * output = (BitString *) malloc(sizeof(BitString));
uint8_t * list;
uint8_t * d_list;
BitString * d_output;
gpuErrchk(cudaMalloc((void **) &d_output, sizeof(BitString)));
dim3 dimblock(1, 1);
dim3 dimgrid(1, 1);
evaluate<<<dimblock,dimgrid>>>(d_output, d_list);
gpuErrchk(cudaMemcpy(output, d_output, sizeof(BitString), cudaMemcpyDeviceToHost));
gpuErrchk(cudaMemcpy(&(output->bit_size), &(d_output->bit_size), sizeof(uint16_t), cudaMemcpyDeviceToHost));
printf("output->bit_size = %dn", output->bit_size);
output->list = (uint8_t *) malloc(sizeof(uint8_t*)*3);
list = (uint8_t *) malloc(sizeof(uint8_t*)*3);
gpuErrchk(cudaMemcpy(list, d_output->list, sizeof(uint8_t*)*3, cudaMemcpyDeviceToHost));
printf("%dn", output->list[1]);
gpuErrchk(cudaFree(d_output));
free(list);
free(output->list);
free(output);
}
</code>
<code>#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdarg.h>
#ifdef __CUDACC__
#define CUDA_HOSTDEV __host__ __device__
#else
#define CUDA_HOSTDEV
#endif
//msb are at the beginning (index 0)
typedef struct {
uint8_t *list;
uint16_t bit_size;
} BitString;
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %dn", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
__global__ void evaluate(BitString * d_output, uint8_t * d_list) {
BitString *input;
input = (BitString *) malloc(sizeof(BitString));
input->bit_size = 3;
input->list = (uint8_t *) malloc(sizeof(uint8_t)*3);
d_output->list = (uint8_t *) malloc(sizeof(uint8_t)*3);
input->list[0] = 1;
input->list[1] = 0;
input->list[2] = 1;
d_output->bit_size = input->bit_size;
d_output = input;
d_output->list = input->list;
memcpy(d_output->list, input->list, sizeof(uint8_t)*input->bit_size);
printf("d_output->list inside kernel: %dn", d_output->list[0]);
}
int main(int argc, char *argv[]) {
BitString * output = (BitString *) malloc(sizeof(BitString));
uint8_t * list;
uint8_t * d_list;
BitString * d_output;
gpuErrchk(cudaMalloc((void **) &d_output, sizeof(BitString)));
dim3 dimblock(1, 1);
dim3 dimgrid(1, 1);
evaluate<<<dimblock,dimgrid>>>(d_output, d_list);
gpuErrchk(cudaMemcpy(output, d_output, sizeof(BitString), cudaMemcpyDeviceToHost));
gpuErrchk(cudaMemcpy(&(output->bit_size), &(d_output->bit_size), sizeof(uint16_t), cudaMemcpyDeviceToHost));
printf("output->bit_size = %dn", output->bit_size);
output->list = (uint8_t *) malloc(sizeof(uint8_t*)*3);
list = (uint8_t *) malloc(sizeof(uint8_t*)*3);
gpuErrchk(cudaMemcpy(list, d_output->list, sizeof(uint8_t*)*3, cudaMemcpyDeviceToHost));
printf("%dn", output->list[1]);
gpuErrchk(cudaFree(d_output));
free(list);
free(output->list);
free(output);
}
</code>
#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdarg.h>
#ifdef __CUDACC__
#define CUDA_HOSTDEV __host__ __device__
#else
#define CUDA_HOSTDEV
#endif
//msb are at the beginning (index 0)
typedef struct {
uint8_t *list;
uint16_t bit_size;
} BitString;
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %dn", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
__global__ void evaluate(BitString * d_output, uint8_t * d_list) {
BitString *input;
input = (BitString *) malloc(sizeof(BitString));
input->bit_size = 3;
input->list = (uint8_t *) malloc(sizeof(uint8_t)*3);
d_output->list = (uint8_t *) malloc(sizeof(uint8_t)*3);
input->list[0] = 1;
input->list[1] = 0;
input->list[2] = 1;
d_output->bit_size = input->bit_size;
d_output = input;
d_output->list = input->list;
memcpy(d_output->list, input->list, sizeof(uint8_t)*input->bit_size);
printf("d_output->list inside kernel: %dn", d_output->list[0]);
}
int main(int argc, char *argv[]) {
BitString * output = (BitString *) malloc(sizeof(BitString));
uint8_t * list;
uint8_t * d_list;
BitString * d_output;
gpuErrchk(cudaMalloc((void **) &d_output, sizeof(BitString)));
dim3 dimblock(1, 1);
dim3 dimgrid(1, 1);
evaluate<<<dimblock,dimgrid>>>(d_output, d_list);
gpuErrchk(cudaMemcpy(output, d_output, sizeof(BitString), cudaMemcpyDeviceToHost));
gpuErrchk(cudaMemcpy(&(output->bit_size), &(d_output->bit_size), sizeof(uint16_t), cudaMemcpyDeviceToHost));
printf("output->bit_size = %dn", output->bit_size);
output->list = (uint8_t *) malloc(sizeof(uint8_t*)*3);
list = (uint8_t *) malloc(sizeof(uint8_t*)*3);
gpuErrchk(cudaMemcpy(list, d_output->list, sizeof(uint8_t*)*3, cudaMemcpyDeviceToHost));
printf("%dn", output->list[1]);
gpuErrchk(cudaFree(d_output));
free(list);
free(output->list);
free(output);
}