I have an assigment project for a job application which is mostly elevating my CUDA and C++ skills. I have experience with C++ but not with CUDA, so this is my first time and pardon me for any incovenient questions.
I have developed a Triathlon simulation using CUDA C++ and trying to calculate each athlete’s position using a kernel (global function) by calculating it each second on GPU. I tried to follow OOP principles and used encapsulation in Athlete class, so that used getters and setters.
However, when I try to build the project, I get the following error which occurs simply due to public functions used of Athlete class, inside the kernel.
Here is the error I get when trying to build my project:
Severity Code Description Project File Line Suppression State Details
Error MSB3721 The command ""C:Program FilesNVIDIA GPU Computing ToolkitCUDAv12.5binnvcc.exe" -gencode=arch=compute_52,code="sm_52,compute_52" --use-local-env -ccbin "C:Program FilesMicrosoft Visual Studio2022CommunityVCToolsMSVC14.40.33807binHostX64x64" -x cu -I"C:Program FilesNVIDIA GPU Computing ToolkitCUDAv12.5include" -I"C:Program FilesNVIDIA GPU Computing ToolkitCUDAv12.5include" -G --keep-dir Triathlonx64Debug -maxrregcount=0 --machine 64 --compile -cudart static -g -D_DEBUG -D_CONSOLE -D"_UNICODE" -D"UNICODE" -Xcompiler "/EHsc /W3 /nologo /Od /FS /Zi /RTC1 /MDd " -Xcompiler "/FdTriathlonx64Debugvc143.pdb" -o C:UsersbuurasourcereposTriathlonTriathlonx64DebugRace.cu.obj "C:UsersbuurasourcereposTriathlonRace.cu"" exited with code 2. Triathlon C:Program FilesMicrosoft Visual Studio2022CommunityMSBuildMicrosoftVCv170BuildCustomizationsCUDA 12.5.targets 799
Here is the necessary code from the project :
#include "Race.cuh"
#include "Athlete.cuh"
#include <iostream>
#include <algorithm>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <thread>
#include <chrono>
#include <stdio.h>
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort = true) {
if (code != cudaSuccess) {
fprintf(stderr, "GPUassert: %s %s %dn", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
// CUDA kernel to update athlete positions
__global__ void updatePositions(Athlete* athletes, float raceTime) {
int segment_distances[3] = { 5000, 45000, 100000 }; // Swimming, Cycling, Running distances
int num_athletes = 900;
int idx = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
for (int i = idx; i < num_athletes; i += stride) {
if (!athletes[i].getRaceFinished()) {
// Update athlete's position
athletes[i].setPosition(athletes[i].getPosition() + athletes[i].getSpeed());
// Handle segment transitions
if (athletes[i].getSegment() == 0 && athletes[i].getPosition() >= segment_distances[0]) {
athletes[i].setSpeed(athletes[i].getSpeed() * 3);
athletes[i].setFinishTime(athletes[i].getFinishTime() + 10);
athletes[i].setSegment(1);
athletes[i].setPosition(segment_distances[0]); // Exact segment boundary
}
else if (athletes[i].getSegment() == 1 && athletes[i].getPosition() >= segment_distances[1]) {
athletes[i].setSpeed(athletes[i].getSpeed() / 3);
athletes[i].setFinishTime(athletes[i].getFinishTime() + 10);
athletes[i].setSegment(2);
athletes[i].setPosition(segment_distances[1]); // Exact segment boundary
}
else if (athletes[i].getSegment() == 2 && athletes[i].getPosition() >= segment_distances[2]) {
athletes[i].setFinishTime(athletes[i].getFinishTime() + raceTime);
athletes[i].setPosition(segment_distances[2]);
athletes[i].setRaceFinished(true);
}
}
}
}
Race::Race(int n, std::vector<std::vector<float>>& athlete_speeds) : num_teams(n), raceTime(0.0) {
for (int i = 0; i < num_teams; ++i) {
teams.emplace_back(i, athlete_speeds[i].data());
}
std::cout << "Race created." << std::endl;
std::cout << "Number of teams: " << teams.size() << std::endl;
}
std::vector<Team> Race::getTeams() {
return this->teams;
}
int Race::getNumberOfTeams() {
return this->num_teams;
}
void Race::setNumberOfTeams(int num_teams) {
this->num_teams = num_teams;
}
float Race::getRaceTime() {
return this->raceTime;
}
void Race::setRaceTime(float raceTime) {
this->raceTime = raceTime;
}
cudaError_t Race::startRace(const int team_index, const int athlete_index) {
std::cout << "Race started." << std::endl;
int num_athletes = num_teams * 3;
Athlete* athletes;
int segment_distances[3] = { 5000, 45000, 55000 }; // Swimming, Cycling, Running distances
cudaError_t cudaStatus;
bool firstAthlete = false;
cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?");
goto Error;
}
// Allocate managed memory for athletes
gpuErrchk(cudaStatus = cudaMallocManaged(&athletes, num_athletes * sizeof(Athlete)));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMallocManaged failed!");
goto Error;
}
// Initialize athlete data
for (int i = 0; i < num_teams; ++i) {
for (int j = 0; j < 3; ++j) {
int idx = i * 3 + j;
athletes[idx].setTeamId(i);
athletes[idx].setSpeed(teams[i].getAthletes()[j].getSpeed());
athletes[idx].setPosition(teams[i].getAthletes()[j].getPosition()); // Start at the beginning
athletes[idx].setFinishTime(teams[i].getAthletes()[j].getFinishTime()); // Start time
athletes[idx].setSegment(teams[i].getAthletes()[j].getSegment()); // Start in swimming segment
}
}
while (true) {
int num_blocks = static_cast<int>((num_athletes + 255) / 256); // Ensure to make it integer
int blockSize = 256;
updatePositions<<<num_blocks, blockSize>>>(athletes, raceTime);
// Check for any errors launching the kernel
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "updatePositions launch failed: %sn", cudaGetErrorString(cudaStatus));
goto Error;
}
// cudaDeviceSynchronize waits for the kernel to finish, and returns
// any errors encountered during the launch.
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching updatePositions!n", cudaStatus);
goto Error;
}
// Check for race completion
bool race_ongoing = false;
for (int i = 0; i < num_athletes; ++i) {
if (athletes[i].getPosition() < segment_distances[2]) {
race_ongoing = true;
break;
}
else {
if (firstAthlete == false) {
printAthleteResults(athletes);
firstAthlete = true;
}
}
}
// Sleep for a second to simulate real-time updates (if running on a system where this is feasible)
std::this_thread::sleep_for(std::chrono::seconds(1));
raceTime++;
// Print debugging information
int i = team_index * 3 + athlete_index;
printf("Athlete %d: Position = %f, Speed = %f, Time = %f, Segment = %dn", i, athletes[i].getPosition(), athletes[i].getSpeed(), raceTime, athletes[i].getSegment());
if (!race_ongoing) break;
}
// Print final results
printTeamResults(athletes);
Error:
// Free managed memory
gpuErrchk(cudaFree(athletes));
return cudaStatus;
}
void Race::printAthleteResults(Athlete* athletes) {
std::cout << "First Winner has finished the triathlon. Current results :n";
// Print individual athlete results
for (int i = 0; i < num_teams; ++i) {
for (int j = 0; j < 3; ++j) {
int idx = i * 3 + j;
std::cout << "Athlete" << j << " in team " << teams[i].getTeamId() << " - Position: " << athletes[idx].getPosition()
<< ", Speed: " << athletes[idx].getSpeed() << ", Time: " << athletes[idx].getFinishTime() << " secondsn";
}
}
}
void Race::printTeamResults(Athlete* athletes) {
// Calculate team rankings based on total time
std::vector<std::pair<int, float>> team_times;
for (int i = 0; i < num_teams; ++i) {
float total_time = 0.0f;
float average_time = 0;
for (int j = 0; j < 3; ++j) {
int idx = i * 3 + j;
total_time += athletes[idx].getFinishTime();
}
average_time = total_time / 3;
team_times.emplace_back(i, average_time);
}
// Sort teams by average time
std::sort(team_times.begin(), team_times.end(), [](const auto& left, const auto& right) {
return left.second < right.second;
});
// Print team rankings
std::cout << "Team Rankings:n";
for (size_t i = 0; i < team_times.size(); ++i) {
std::cout << "Rank " << (i + 1) << ": Team " << team_times[i].first
<< " Average Time: " << team_times[i].second << " secondsn";
}
}
What may cause the problem? In relation here is my Athlete.cuh file. ( .cu file is correct, I checked many times.)
#ifndef ATHLETE_CUH
#define ATHLETE_CUH
#include <cuda_runtime.h>
class Athlete {
private:
int id;
int team_id;
float position;
float speed;
int segment;
float finishTime;
bool race_finished;
public:
__host__ __device__ __forceinline__ int getId() const;
__host__ __device__ __forceinline__ void setId(int id);
__host__ __device__ __forceinline__ int getTeamId() const;
__host__ __device__ __forceinline__ void setTeamId(int team_id);
__host__ __device__ __forceinline__ float getPosition() const;
__host__ __device__ __forceinline__ void setPosition(float position);
__host__ __device__ __forceinline__ float getSpeed() const;
__host__ __device__ __forceinline__ void setSpeed(float speed);
__host__ __device__ __forceinline__ int getSegment() const;
__host__ __device__ __forceinline__ void setSegment(int segment);
__host__ __device__ __forceinline__ float getFinishTime() const;
__host__ __device__ __forceinline__ void setFinishTime(float finishTime);
__host__ __device__ __forceinline__ bool getRaceFinished() const;
__host__ __device__ __forceinline__ void setRaceFinished(bool race_finished);
// Athlete Constructor
__host__ __device__ Athlete(int id = 0, int team_id = 0, float initial_speed = 0.0f);
};
#endif // ATHLETE_CUH
Looking forward for your answers. THANK YOU!