I attempted to implement Dijkstra’s algorithm using CUDA. Although there are no syntax errors, when I compare the results with those from the serialized non-CUDA code, they appear to be incorrect.
It’s first time to write source code with cuda, so I think I missed something, but I don’t have any idea how to fix it.
#include <iostream>
#include <fstream>
#include <vector>
#include <cmath>
#include <cuda_runtime.h>
#include <chrono>
#include <cfloat>
#define V 18772
#define BLOCK_SIZE 256
using namespace std;
using std::chrono::high_resolution_clock;
using std::chrono::duration_cast;
using std::chrono::duration;
using std::chrono::milliseconds;
struct IntDoublePair{
int first;
double second;
};
__device__ double atomicMin_double(double* address, double val)
{
unsigned long long int* address_as_ull = (unsigned long long int*) address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = atomicCAS(address_as_ull, assumed,
__double_as_longlong(fmin(val, __longlong_as_double(assumed))));
} while (assumed != old);
return __longlong_as_double(old);
}
__global__ void startDijkstra(int u, double *distances, int *adjacencyList, int *edges, double *weights) {
int v = blockIdx.x * blockDim.x + threadIdx.x;
if (v == u) {
distances[v] = 0.0;
}
__syncthreads();
if (v <= V) {
int start = adjacencyList[v];
int end = adjacencyList[v + 1];
for (int i = start; i < end; ++i) {
int neighbor = edges[i];
double weight = weights[i];
if (distances[v] + weight < distances[neighbor]) {
atomicMin_double(&distances[neighbor], distances[v] + weight);
}
// __syncthreads();
}
}
}
int main(void) {
ifstream ifs("undirected.txt");
if (!ifs.is_open()) {
cerr << "Error: cannot open filen";
return -1;
}
// Assuming the graph is represented as an adjacency list
vector<int> adjacencyList(V + 1, 0);
vector<int> edges;
vector<double> weights;
int i, j;
double w;
while (ifs >> i >> j >> w) {
edges.push_back(j);
weights.push_back(w);
++adjacencyList[i];
edges.push_back(i);
weights.push_back(w);
++adjacencyList[j];
}
ifs.close();
// Calculate prefix sum to get adjacency list offsets
for (int i = 1; i <= V; ++i) {
adjacencyList[i] += adjacencyList[i - 1];
}
double *distances;
cudaMallocManaged(&distances, V + 1 * sizeof(double));
for(int i = 0; i < V + 1; i++) {
distances[i] = DBL_MAX;
}
int *d_adjacencyList, *d_edges;
double *d_weights;
cudaMalloc(&d_adjacencyList, (V + 1) * sizeof(int));
cudaMalloc(&d_edges, edges.size() * sizeof(int));
cudaMalloc(&d_weights, weights.size() * sizeof(double));
cudaMemcpy(d_adjacencyList, adjacencyList.data(), (V + 1) * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_edges, edges.data(), edges.size() * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_weights, weights.data(), weights.size() * sizeof(double), cudaMemcpyHostToDevice);
auto t1 = high_resolution_clock::now();
startDijkstra<<<(V + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE>>>(4, distances, d_adjacencyList, d_edges, d_weights);
cudaDeviceSynchronize();
auto t2 = high_resolution_clock::now();
for (int i = 1; i <= V; ++i) {
if (distances[i] != DBL_MAX)
cout << i << 't' << distances[i] << endl;
}
duration<double, std::milli> ms_double = t2 - t1;
std::cout << ms_double.count() << "msn";
cudaFree(distances);
cudaFree(d_adjacencyList);
cudaFree(d_edges);
cudaFree(d_weights);
return 0;
}
Can anybody tell me what I did wrong?
New contributor
Doru is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.