I am implementing a parallel two-dimensional SPH method. I have a problem with efficiently copying a three-dimensional array to a device.
I partially solved the problem by copying this entire array to the device, but this solution was achieved by converting a three-dimensional array into a one-dimensional one, while I had to make the 3rd dimension constant, otherwise I just didn’t figure out how to do it. After all, the number of particles in a grid cell can be different. And now I have a lot of inefficient memory allocated on the device.
Here is my grid code, copy-to-device functions, and how I calculate the index in this one-dimensional array:
typedef struct Grid
{
double cellSize;
int gridSizeX, gridSizeY, maxcount = 0;
Particle*** particles;
int** count;
} Grid;
Grid createGrid(double maxSmoothingLength, const Particle* particles, int particleCount)
{
double cellSize = 2.0 * maxSmoothingLength;
int gridSizeX = static_cast<int>(ceil((b - a) / cellSize));
int gridSizeY = static_cast<int>(ceil((b - a) / cellSize));
Grid grid;
grid.cellSize = cellSize;
grid.gridSizeX = gridSizeX;
grid.gridSizeY = gridSizeY;
grid.count = new int* [gridSizeX];
for (int i = 0; i < gridSizeX; ++i)
{
grid.count[i] = new int[gridSizeY];
for (int j = 0; j < gridSizeY; ++j)
{
grid.count[i][j] = 0;
}
}
for (int k = 0; k < particleCount; ++k)
{
const Particle& particle = particles[k];
if (particle.x >= a && particle.x <= b && particle.y >= a && particle.y <= b)
{
int cellX = static_cast<int>(particle.x / cellSize);
int cellY = static_cast<int>(particle.y / cellSize);
if (cellX >= 0 && cellX < gridSizeX && cellY >= 0 && cellY < gridSizeY)
{
grid.count[cellX][cellY]++;
}
}
}
for (int i = 0; i < gridSizeX; i++)
{
for (int j = 0; j < gridSizeY; j++)
{
if (grid.count[i][j] > grid.maxcount)
grid.maxcount = grid.count[i][j];
}
}
grid.particles = new Particle **[gridSizeX];
for (int i = 0; i < gridSizeX; ++i)
{
grid.particles[i] = new Particle * [gridSizeY];
for (int j = 0; j < gridSizeY; ++j)
{
grid.particles[i][j] = new Particle[grid.maxcount];
grid.count[i][j] = 0;
}
}
for (int k = 0; k < particleCount; ++k)
{
const Particle& particle = particles[k];
if (particle.x >= a && particle.x <= b && particle.y >= a && particle.y <= b)
{
int cellX = static_cast<int>(particle.x / cellSize);
int cellY = static_cast<int>(particle.y / cellSize);
if (cellX >= 0 && cellX < gridSizeX && cellY >= 0 && cellY < gridSizeY)
{
int index = grid.count[cellX][cellY];
grid.particles[cellX][cellY][index].copyFrom(particles[k]);
grid.count[cellX][cellY]++;
}
}
}
return grid;
}
int Index = x * (gridSizeY * maxcount) + y * maxcount + k;
SKENDLI is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.