Thiết kế website giá rẻ

Question

My objective is to see if I can use shared_ptr in c++ to pass resources to compute regions on gpu using OpenACC.

The code snippet is as follows:

#include <iostream>
#include <memory>
#include <vector>
#include <openacc.h>
int main(){
    // Create a unique pointer to an STL vector of ints
    std::shared_ptr<std::vector<int>> vec_ptr = std::make_shared<std::vector<int>>();

    // Fill the vector with some data
    for (int i = 0; i < 10; ++i) {
        vec_ptr->push_back(i);
    }
    #pragma acc enter data copyin(vec_ptr[0:1], vec_ptr[0:9])

    // Parallelize a loop over the vector using OpenACC
    #pragma acc data copyout(vec_ptr[0:9])
    {
      #pragma acc parallel loop
      for (int i = 0; i < vec_ptr->size(); ++i) {
        printf(" The host, device flags are %d, %d n", acc_on_device(acc_device_host), acc_on_device(acc_device_nvidia));
        // Access and modify vector elements safely in parallel
        (*vec_ptr)[i] *= 2;
      }
    }

    #pragma acc update self (vec_ptr[0:9])
    //Print the modified vector from host
    for (int i = 0; i < vec_ptr->size(); ++i)  {
        std::cout << (*vec_ptr)[i] << " ";
    }
    std::cout << std::endl;

    #pragma acc exit data delete (vec_ptr[0:999], vec_ptr[0:1])
    return 0;
}

My CMake file looks like this:

cmake_minimum_required(VERSION 3.23)

project(
    testSmartPtrs
    VERSION 0.1
    LANGUAGES CXX
    )

#Set output directory
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/app")

find_package(OpenACC REQUIRED)
find_package(CUDAToolkit REQUIRED)

set(CMAKE_CXX_COMPILER "nvc++")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -acc -gpu=cc70,managed -Minfo=all")

message(OPENACC_CXX_FLAGS= ${OpenACC_CXX_FLAGS})
message(OpenACC_CXX_VERSION_MAJOR= ${OpenACC_CXX_VERSION_MAJOR})
message(CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}")

 add_executable( testSmartPtrs )
# target_include_directories(testSmartPtrs PUBLIC "${PROJECT_SOURCE_DIR}/include")
 target_compile_options(testSmartPtrs PUBLIC ${OPENACC_CXX})
 target_link_libraries(testSmartPtrs PUBLIC OpenACC::OpenACC_CXX)
 target_sources(testSmartPtrs PRIVATE
                src/main.cpp
 )

My build looks as follows:

[simon@axbn build]$ make
Consolidate compiler generated dependencies of target testSmartPtrs
[ 50%] Building CXX object CMakeFiles/testSmartPtrs.dir/src/main.cpp.o
nvc++-Warning-CUDA_HOME has been deprecated. Please, use NVHPC_CUDA_HOME instead.
main:
     18, Generating enter data copyin(vec_ptr)
         Generating copyout(vec_ptr) [if not already present]
         Generating NVIDIA GPU code
         20, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
     26, Generating update self(vec_ptr)
     33, Generating exit data delete(vec_ptr)
std::__shared_ptr<std::vector<int, std::allocator<int>>, (__gnu_cxx::_Lock_policy)2>::operator*() const:
      2, include "memory"
          10, include "memory"
               82, include "shared_ptr.h"
                    52, include "shared_ptr_base.h"
                        906, Generating implicit acc routine seq
                             Generating acc routine seq
                             Generating NVIDIA GPU code
std::__shared_ptr<std::vector<int, std::allocator<int>>, (__gnu_cxx::_Lock_policy)2>::operator->() const:
      2, include "memory"
          10, include "memory"
               82, include "shared_ptr.h"
                    52, include "shared_ptr_base.h"
                        913, Generating implicit acc routine seq
                             Generating acc routine seq
                             Generating NVIDIA GPU code
std::vector<int, std::allocator<int>>::size() const:
      3, include "vector"
          64, include "stl_vector.h"
              646, Generating implicit acc routine seq
                   Generating acc routine seq
                   Generating NVIDIA GPU code
std::vector<int, std::allocator<int>>::operator[](unsigned long):
      3, include "vector"
          64, include "stl_vector.h"
              771, Generating implicit acc routine seq
                   Generating acc routine seq
                   Generating NVIDIA GPU code
[100%] Linking CXX executable ../app/testSmartPtrs
nvc++-Warning-CUDA_HOME has been deprecated. Please, use NVHPC_CUDA_HOME instead.
[100%] Built target testSmartPtrs

Expected result vs obtained result
I would expect that printf() would return 0,1 ten times followed by a modified array {0 2 4 6 8 10 12 14 16 18}. However, what I get instead is that the printf() returns 1,0 ten times followed by a modified array {0 2 4 6 8 10 12 14 16 18}. This means that the computation has happened only on the host and not on the device.

Any help or direction on why the code does not execute on the gpu is highly appreciated.

Thanks a ton,
Sangeeth

testSmartPtr.zip|attachment (2.8 KB)

Thiết kế website giá rẻ

Danh mục

C++ Smart Pointers and OpenACC