I am working on a clustering application and I need to compute the cluster label for each point based on the number of neighbouring points. I am new to GPU porgramming in CUDA and I am running my application on NVIDIA’s Grace Hopper processor.
My code snippet is as follows.
Device code
__global__ void regional_query_cuda( const int32_t point_index,
const int32_t* neighboring_points,
const int32_t* clusters,
const int32_t n,
const float EPS2,
int32_t* min_points_area,
const float* dataset,
const size_t dimensions,
int32_t* cluster_label
) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if(i < n) {
float offsets = 0.0f;
/*calculate EUCLIDEAN distance between point_index and neighboring_points[i] */
for (size_t d = 0; d < dimensions; ++d) {
float coord = dataset[point_index * dimensions + d];
float ocoord = dataset[neighboring_points[i] * dimensions + d];
const float distance = coord - ocoord;
offsets += distance * distance;
}
// .. if in range, add it to the vector with in range points
if (offsets <= EPS2) {
min_points_area[i] = neighboring_points[i];
int32_t nb_label = clusters[neighboring_points[i]];
// if neighbor point has an assigned label and it is a core, determine what label to take
if (nb_label < 0) {
atomicMin(cluster_label, nb_label); //cluster_label will have the minimum label among all the points that are within EPS2 distance
}
}
}
}
I used the Thrust library as it is easier to compute reductions. My host code is as follows:
int32_t cluster_label = m_global_point_offset + point_index + 1;
int32_t blockSize = 256;
int32_t numBlocks = (n + blockSize - 1) / blockSize;
thrust::device_vector<int32_t> d_cluster_label(1, cluster_label);
regional_query_cuda<<<numBlocks, blockSize>>>(point_index, neighboring_points, clusters, n, EPS2,
min_points_area,
dataset, dimensions, thrust::raw_pointer_cast(d_cluster_label.data()));
cudaDeviceSynchronize();
cluster_label = d_cluster_label[0];
The results are incorrect. I get the correct result when I run the code completely on the CPU. Is there something missing? Just to reiterate, cluster_label
needs to have the smallest label among all the points that are within ESP2
to the point denoted by point_index
. The array clusters
contains the cluster labels of all the points and n
is the total number of points.