I have this a.cu
file:
extern "C" __device__ int test42();
__global__ void get42(int* toStore){
*toStore = test42();
}
int main() {
int host{0};
int* device;
cudaMalloc(&device, sizeof(int));
get42<<<1, 1>>>(device);
cudaDeviceSynchronize();
cudaMemcpy(&host, device, sizeof(int), cudaMemcpyDeviceToHost);
}
and this b.cu file:
extern "C" __device__ int test42(){
return 42;
}
I compiled b.cu
into a fatbinary stored at somepath/b.fatbin
.
I have also linked both into an object file:
nvcc -arch=sm_60 -dlink -o c.o somepath/b.fatbin a.cu
The object dump clearly shows both functions defined:
$: cuobjdump -sass c.o
Fatbin elf code:
================
arch = sm_60
code version = [1,7]
host = linux
compile_size = 64bit
code for sm_60
Function : test42
.headerflags @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM60 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM60)"
/* 0x001ffc00ffe007f0 */
/*0008*/ MOV32I R4, 0x2a ; /* 0x0100000002a7f004 */
/*0010*/ RET ; /* 0xe32000000007000f */
/*0018*/ BRA 0x18 ; /* 0xe2400fffff87000f */
/* 0x001f8000fc0007e0 */
/*0028*/ NOP; /* 0x50b0000000070f00 */
/*0030*/ NOP; /* 0x50b0000000070f00 */
/*0038*/ NOP; /* 0x50b0000000070f00 */
..........
Function : _Z5get42Pi
.headerflags @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM60 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM60)"
/* 0x001fc400ffa007f6 */
/*0008*/ MOV R1, c[0x0][0x20] ; /* 0x4c98078000870001 */
/*0010*/ JCAL 0x0 ; /* 0xe220000000000040 */
/*0018*/ MOV R2, c[0x0][0x140] ; /* 0x4c98078005070002 */
/* 0x001fbc00fe2007f2 */
/*0028*/ MOV R3, c[0x0][0x144] ; /* 0x4c98078005170003 */
/*0030*/ STG.E [R2], R4 ; /* 0xeedc200000070204 */
/*0038*/ NOP ; /* 0x50b0000000070f00 */
/* 0x001ffc00fc6007ef */
/*0048*/ NOP ; /* 0x50b0000000070f00 */
/*0050*/ NOP ; /* 0x50b0000000070f00 */
/*0058*/ EXIT ; /* 0xe30000000007000f */
/* 0x001f8000fc0007ff */
/*0068*/ BRA 0x60 ; /* 0xe2400fffff07000f */
/*0070*/ NOP; /* 0x50b0000000070f00 */
/*0078*/ NOP; /* 0x50b0000000070f00 */
when I try to build an executable:
nvcc -arch=sm_60 -o executable a.cu c.o -lcudadevrt
I get:
ptxas fatal : Unresolved extern function 'test42'
What am I missing?