I want to realize IFFT function by CUDA. Input data is complex. The real numbers are imported from phase_init_befroe_R.csv
, and the imaginary numbers are imported from phase_init_before_C.csv
. Then I run the cufftExecC2C
or the cufftExecZ2Z
function. The result is saved in out_R.csv
and out_C.csv
. out_R.csv
is what I want.
When the macro SINGLE
is defined, all the data will be single-precision. Here is the result drawn by Matlab command imagesc(csvread("out_R.csv")
:
All files are uploaded to Google Driver. Here is the link:https://drive.google.com/drive/folders/1aQZOxVAFe82W9LyvYzfgmfkas8vsqC2Q?usp=sharing
When the macro SINGLE
is not defined, all the data will be in double-precision. Drawn by Matlab command imagesc(csvread("out_R.csv"))
:
I verify the result using Matlab. Whether the data is single- or double-precision in Matlab, the result is same. And it is similar to the second image.
Why is single-precision data leading to wrong out_R.csv
?
#include <iostream>
#include <cufft.h>
#include <fstream>
#pragma comment(lib,"cufft.lib")
#define ROW 120
#define COL 160
using namespace std;
// change precision between single and double
//#define SINGLE
#ifdef SINGLE
#define REAL cufftReal
#define COMPLEX cufftComplex
#else
#define REAL cufftDoubleReal
#define COMPLEX cufftDoubleComplex
#endif
// read file csv file
template <typename T>
void readData(T* inputData, const char* fileName)
{
ifstream file;
file.open(fileName);
if(file.is_open())
{
for(int i = 0; i<ROW*COL; ++i)
{
file>>*inputData++;
file.get();
}
file.close();
}
else{
printf("Can not open file %s", fileName);
}
}
// save to csv file
template<typename T>
void saveData(T* inputData, const char* fileName, bool realOrImag)
{
ofstream file;
file.open(fileName);
if(file.is_open())
{
for(int i = 0; i<ROW; ++i)
{
for(int j=0; j<COL; ++j)
{
int index = i*COL+j;
if(j<COL-1)
{
if(realOrImag)
{
file<<(*inputData).x<<",";
}
else{
file<<(*inputData).y<<",";
}
inputData++;
}
else
{
if(realOrImag)
{
file<<(*inputData).x<<"n";
}
else{
file<<(*inputData).y<<"n";
}
inputData++;
}
}
}
}
}
template<typename T>
void showData(T* inputData)
{
for(int i=0; i<6; ++i)
{
for(int j = 0; j<3; ++j)
{
printf("(%.3f, %.3f)t", inputData[i*COL+j].x, inputData[i*COL+j].y);
}
printf("n");
}
}
int main() {
// read data from csv file
REAL *real, *imag;
real = (REAL*)malloc(ROW*COL*sizeof(REAL));
imag = (REAL*)malloc(ROW*COL*sizeof(REAL));
readData(real, R"(C:UsersadminDocumentsMATLABphase_init_before_R.csv)");
readData(imag, R"(C:UsersadminDocumentsMATLABphase_init_before_C.csv)");
COMPLEX *in;
in = (COMPLEX*)malloc(ROW*COL*sizeof(COMPLEX));
for(int m = 0; m<ROW*COL; ++m)
{
in[m] = {real[m], imag[m]};
}
// copy form host to device
COMPLEX *in_dev;
cudaMalloc(&in_dev, ROW*COL*sizeof(COMPLEX));
cudaMemcpy(in_dev, in, ROW*COL*sizeof(COMPLEX), cudaMemcpyHostToDevice);
// IFFT
#ifdef SINGLE
cufftHandle handle;
cufftPlan2d(&handle, ROW, COL, CUFFT_C2C);
cufftExecC2C(handle, in_dev, in_dev, CUFFT_INVERSE);
#else
cufftHandle handle;
cufftPlan2d(&handle, ROW, COL, CUFFT_Z2Z);
cufftExecZ2Z(handle, in_dev, in_dev, CUFFT_INVERSE);
#endif
// copy from device to host
COMPLEX *out;
out = (COMPLEX*)malloc(ROW*COL*sizeof(COMPLEX));
cudaMemcpy(out, in_dev, ROW*COL*sizeof(COMPLEX), cudaMemcpyDeviceToHost);
// save to csv file
saveData(out, R"(C:UsersadminDocumentsMATLABout_R.csv)", true);
saveData(out, R"(C:UsersadminDocumentsMATLABout_C.csv)", false);
cudaFree(in_dev);
free(in);
free(out);
free(real);
free(imag);
cufftDestroy(handle);
return 0;
}
At first, to make it work fast, all data is in single precision. But the result is wrong. I checked the syntax and it is correct. So I tried to change the data type from single-precision to double-precision. It happened to produce the correct answer.
8