oneapi-src · bratpiorka · Sep 19, 2024 · Jul 29, 2024
@@ -22,6 +22,7 @@ jobs:
         compiler: [{c: gcc, cxx: g++}]
         shared_library: ['OFF']
         level_zero_provider: ['ON']
+        cuda_provider: ['ON']
         install_tbb: ['ON']
         disable_hwloc: ['OFF']
         link_hwloc_statically: ['OFF']
@@ -31,6 +32,7 @@ jobs:
             compiler: {c: gcc-7, cxx: g++-7}
             shared_library: 'OFF'
             level_zero_provider: 'ON'
+            cuda_provider: 'ON'
             install_tbb: 'ON'
             disable_hwloc: 'OFF'
             link_hwloc_statically: 'OFF'
@@ -39,6 +41,7 @@ jobs:
             compiler: {c: clang, cxx: clang++}
             shared_library: 'OFF'
             level_zero_provider: 'ON'
+            cuda_provider: 'ON'
             install_tbb: 'ON'
             disable_hwloc: 'OFF'
             link_hwloc_statically: 'OFF'
@@ -47,6 +50,7 @@ jobs:
             compiler: {c: gcc, cxx: g++}
             shared_library: 'ON'
             level_zero_provider: 'ON'
+            cuda_provider: 'ON'
             install_tbb: 'ON'
             disable_hwloc: 'OFF'
             link_hwloc_statically: 'OFF'
@@ -55,15 +59,17 @@ jobs:
             compiler: {c: gcc, cxx: g++}
             shared_library: 'ON'
             level_zero_provider: 'ON'
+            cuda_provider: 'ON'
             install_tbb: 'ON'
             disable_hwloc: 'OFF'
             link_hwloc_statically: 'OFF'
-          # test level_zero_provider='OFF'
+          # test level_zero_provider='OFF' and cuda_provider='OFF'
           - os: 'ubuntu-22.04'
             build_type: Release
             compiler: {c: gcc, cxx: g++}
             shared_library: 'OFF'
             level_zero_provider: 'OFF'
+            cuda_provider: 'OFF'
             install_tbb: 'ON'
             disable_hwloc: 'OFF'
             link_hwloc_statically: 'OFF'
@@ -73,6 +79,7 @@ jobs:
             compiler: {c: icx, cxx: icpx}
             shared_library: 'ON'
             level_zero_provider: 'ON'
+            cuda_provider: 'ON'
             install_tbb: 'ON'
             disable_hwloc: 'OFF'
             link_hwloc_statically: 'OFF'
@@ -82,6 +89,7 @@ jobs:
             compiler: {c: gcc, cxx: g++}
             shared_library: 'ON'
             level_zero_provider: 'ON'
+            cuda_provider: 'ON'
             install_tbb: 'OFF'
             disable_hwloc: 'OFF'
             link_hwloc_statically: 'OFF'
@@ -90,6 +98,7 @@ jobs:
             compiler: {c: gcc, cxx: g++}
             shared_library: 'ON'
             level_zero_provider: 'ON'
+            cuda_provider: 'ON'
             install_tbb: 'ON'
             disable_hwloc: 'ON'
             link_hwloc_statically: 'OFF'
@@ -98,6 +107,7 @@ jobs:
             compiler: {c: gcc, cxx: g++}
             shared_library: 'ON'
             level_zero_provider: 'ON'
+            cuda_provider: 'ON'
             install_tbb: 'ON'
             disable_hwloc: 'OFF'
             link_hwloc_statically: 'ON'
@@ -149,6 +159,7 @@ jobs:
         -DCMAKE_C_COMPILER=${{matrix.compiler.c}}
         -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}}
         -DUMF_BUILD_LEVEL_ZERO_PROVIDER=${{matrix.level_zero_provider}}
+        -DUMF_BUILD_CUDA_PROVIDER=${{matrix.cuda_provider}}
         -DUMF_FORMAT_CODE_STYLE=OFF
         -DUMF_DEVELOPER_MODE=ON
         -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
@@ -195,23 +206,27 @@ jobs:
         compiler: [{c: cl, cxx: cl}]
         shared_library: ['ON', 'OFF']
         level_zero_provider: ['ON']
+        cuda_provider: ['ON']
         include:
           - os: 'windows-2022'
             build_type: Release
             compiler: {c: clang-cl, cxx: clang-cl}
             shared_library: 'ON'
             level_zero_provider: 'ON'
+            cuda_provider: 'ON'
             toolset: "-T ClangCL"
           - os: 'windows-2022'
             build_type: Release
             compiler: {c: cl, cxx: cl}
             shared_library: 'ON'
             level_zero_provider: 'ON'
+            cuda_provider: 'ON'
           - os: 'windows-2022'
             build_type: Release
             compiler: {c: cl, cxx: cl}
             shared_library: 'ON'
             level_zero_provider: 'OFF'
+            cuda_provider: 'OFF'
 
     runs-on: ${{matrix.os}}
 
@@ -247,6 +262,7 @@ jobs:
         -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
         -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
         -DUMF_BUILD_LEVEL_ZERO_PROVIDER=${{matrix.level_zero_provider}}
+        -DUMF_BUILD_CUDA_PROVIDER=${{matrix.cuda_provider}}
         -DUMF_TESTS_FAIL_ON_SKIP=ON
 
     - name: Build UMF
@@ -305,6 +321,7 @@ jobs:
         -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
         -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF
         -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
+        -DUMF_BUILD_CUDA_PROVIDER=ON
         -DUMF_TESTS_FAIL_ON_SKIP=ON
         -DUMF_LINK_HWLOC_STATICALLY=ON
 
@@ -347,6 +364,7 @@ jobs:
         -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
         -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF
         -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
+        -DUMF_BUILD_CUDA_PROVIDER=ON
         -DUMF_TESTS_FAIL_ON_SKIP=ON
         -DUMF_LINK_HWLOC_STATICALLY=ON
 

@@ -63,6 +63,7 @@ jobs:
           -DUMF_FORMAT_CODE_STYLE=OFF
           -DUMF_DEVELOPER_MODE=OFF
           -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
+          -DUMF_BUILD_CUDA_PROVIDER=ON
           -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
           -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
 

@@ -84,6 +84,7 @@ jobs:
         -DUMF_DEVELOPER_MODE=ON
         -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
         -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
+        -DUMF_BUILD_CUDA_PROVIDER=ON
         -DUMF_TESTS_FAIL_ON_SKIP=ON
 
     - name: Build

@@ -106,6 +106,7 @@ jobs:
         -DUMF_BUILD_TESTS=${{matrix.build_tests}}
         -DUMF_BUILD_EXAMPLES=ON
         -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
+        -DUMF_BUILD_CUDA_PROVIDER=ON
         -DUMF_TESTS_FAIL_ON_SKIP=ON
         -DUMF_BUILD_SHARED_LIBRARY=ON
         ${{matrix.extra_build_options}}

@@ -1,6 +1,7 @@
 # This workflow builds and tests providers using GPU memory. It requires 
-# "level_zero" labeled self-hosted runners installed on systems with the 
-# appropriate GPU and drivers.
+# appropriately labelled self-hosted runners installed on systems with the 
+# correct GPU and drivers
+
 name: GPU
 
 on: [workflow_call]
@@ -63,6 +64,7 @@ jobs:
           -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
           -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
           -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
+          -DUMF_BUILD_CUDA_PROVIDER=OFF
           -DUMF_TESTS_FAIL_ON_SKIP=ON
       
       - name: Configure build for Ubuntu
@@ -84,6 +86,7 @@ jobs:
           -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
           -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
           -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
+          -DUMF_BUILD_CUDA_PROVIDER=OFF
           -DUMF_TESTS_FAIL_ON_SKIP=ON
 
       - name: Build UMF
@@ -100,3 +103,66 @@ jobs:
       - name: Run benchmarks
         working-directory: ${{env.BUILD_DIR}}
         run: ctest --output-on-failure --test-dir benchmark -C ${{env.BUILD_TYPE}} --exclude-regex umf-bench-multithreaded
+
+  gpu-CUDA:
+    name: Build
+    env:
+      BUILD_TYPE: Release
+    # run only on upstream; forks will not have the HW
+    if: github.repository == 'oneapi-src/unified-memory-framework'
+    strategy:
+      matrix:
+        shared_library: ['ON', 'OFF']
+        # TODO add windows
+        os: ['Ubuntu']
+        include:
+        - os: 'Ubuntu'
+          compiler: {c: gcc, cxx: g++}
+          number_of_processors: '$(nproc)'
+
+    runs-on: ["DSS-CUDA", "DSS-${{matrix.os}}"]
+    steps:
+    - name: Checkout
+      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+      with:
+        fetch-depth: 0
+
+    - name: Get information about platform
+      if: matrix.os == 'Ubuntu'
+      run: .github/scripts/get_system_info.sh
+
+    - name: Configure build for Ubuntu
+      if: matrix.os == 'Ubuntu'
+      run: >
+        cmake -B ${{env.BUILD_DIR}}
+        -DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}"
+        -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
+        -DCMAKE_C_COMPILER=${{matrix.compiler.c}}
+        -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}}
+        -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}}
+        -DUMF_BUILD_BENCHMARKS=ON
+        -DUMF_BUILD_TESTS=ON
+        -DUMF_BUILD_GPU_TESTS=ON
+        -DUMF_BUILD_GPU_EXAMPLES=ON
+        -DUMF_FORMAT_CODE_STYLE=OFF
+        -DUMF_DEVELOPER_MODE=ON
+        -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
+        -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
+        -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF
+        -DUMF_BUILD_CUDA_PROVIDER=ON
+        -DUMF_TESTS_FAIL_ON_SKIP=ON
+
+    - name: Build UMF
+      run: cmake --build ${{env.BUILD_DIR}} --config ${{env.BUILD_TYPE}} -j ${{matrix.number_of_processors}}
+
+    - name: Run tests
+      working-directory: ${{env.BUILD_DIR}}
+      run: ctest -C ${{env.BUILD_TYPE}} --output-on-failure --test-dir test
+
+    - name: Run examples
+      working-directory: ${{env.BUILD_DIR}}
+      run: ctest --output-on-failure --test-dir examples -C ${{env.BUILD_TYPE}}
+
+    - name: Run benchmarks
+      working-directory: ${{env.BUILD_DIR}}
+      run: ctest --output-on-failure --test-dir benchmark -C ${{env.BUILD_TYPE}} --exclude-regex umf-bench-multithreaded
@@ -79,6 +79,7 @@ jobs:
         -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
         -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
         -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF
+        -DUMF_BUILD_CUDA_PROVIDER=OFF
         -DUMF_USE_VALGRIND=1
         -DUMF_TESTS_FAIL_ON_SKIP=ON
 

@@ -37,6 +37,7 @@ jobs:
         -DUMF_FORMAT_CODE_STYLE=ON
         -DUMF_BUILD_TESTS=OFF
         -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF
+        -DUMF_BUILD_CUDA_PROVIDER=OFF
         -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF
 
     - name: Check C/C++ formatting

@@ -55,6 +55,7 @@ jobs:
         -DCMAKE_C_COMPILER=${{matrix.compiler.c}}
         -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}}
         -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON
+        -DUMF_BUILD_CUDA_PROVIDER=ON
         -DUMF_FORMAT_CODE_STYLE=OFF
         -DUMF_DEVELOPER_MODE=ON
         -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
@@ -132,6 +133,7 @@ jobs:
         -DUMF_USE_ASAN=${{matrix.sanitizers.asan}}
         -DUMF_BUILD_EXAMPLES=ON
         -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF
+        -DUMF_BUILD_CUDA_PROVIDER=OFF
         -DUMF_TESTS_FAIL_ON_SKIP=ON
 
     - name: Build UMF

@@ -35,6 +35,7 @@ jobs:
         -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON
         -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON
         -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF
+        -DUMF_BUILD_CUDA_PROVIDER=OFF
         -DUMF_USE_VALGRIND=1
         -DUMF_TESTS_FAIL_ON_SKIP=ON
 

@@ -36,6 +36,7 @@ find_package(PkgConfig)
 # Build Options
 option(UMF_BUILD_SHARED_LIBRARY "Build UMF as shared library" OFF)
 option(UMF_BUILD_LEVEL_ZERO_PROVIDER "Build Level Zero memory provider" ON)
+option(UMF_BUILD_CUDA_PROVIDER "Build CUDA memory provider" ON)
 option(UMF_BUILD_LIBUMF_POOL_DISJOINT
        "Build the libumf_pool_disjoint static library" OFF)
 option(UMF_BUILD_LIBUMF_POOL_JEMALLOC
@@ -407,6 +408,18 @@ else()
     )
 endif()
 
+if((UMF_BUILD_GPU_TESTS OR UMF_BUILD_GPU_EXAMPLES) AND UMF_BUILD_CUDA_PROVIDER)
+    find_package(CUDA REQUIRED cuda)
+    if(CUDA_LIBRARIES)
+        set(UMF_CUDA_ENABLED TRUE)
+    else()
+        message(
+            STATUS "Disabling tests and examples that use the CUDA provider "
+                   "because the CUDA libraries they require were not found.")
+    endif()
+    # TODO do the same for ze_loader
+endif()
+
 # set optional symbols for map/def files
 #
 # TODO: ref. #649
@@ -417,6 +430,11 @@ if(UMF_BUILD_LEVEL_ZERO_PROVIDER)
     add_optional_symbol(umfLevelZeroMemoryProviderOps)
 endif()
 
+# Conditional configuration for CUDA provider
+if(UMF_BUILD_CUDA_PROVIDER)
+    add_optional_symbol(umfCUDAMemoryProviderOps)
+endif()
+
 if(NOT UMF_DISABLE_HWLOC)
     add_optional_symbol(umfOsMemoryProviderOps)
     if(LINUX)

@@ -19,10 +19,11 @@ The Unified Memory Framework (UMF) is a library for constructing allocators and
 For a quick introduction to UMF usage, please see
 [examples](https://oneapi-src.github.io/unified-memory-framework/examples.html)
 documentation, which includes the code of the
-[basic example](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/basic/basic.c)
-and the more advanced one that allocates
-[USM memory from the GPU device](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/basic/gpu_shared_memory.c)
-using the Level Zero API and UMF Level Zero memory provider.
+[basic example](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/basic/basic.c).
+The are also more advanced that allocates USM memory from the 
+[Level Zero device](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/level_zero_shared_memory/level_zero_shared_memory.c)
+using the Level Zero API and UMF Level Zero memory provider and [CUDA device](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/cuda_shared_memory/cuda_shared_memory.c)
+using the CUDA API and UMF CUDA memory provider.
 
 ## Build
 
@@ -101,6 +102,7 @@ List of options provided by CMake:
 | - | - | - | - |
 | UMF_BUILD_SHARED_LIBRARY | Build UMF as shared library | ON/OFF | OFF |
 | UMF_BUILD_LEVEL_ZERO_PROVIDER | Build Level Zero memory provider | ON/OFF | ON |
+| UMF_BUILD_CUDA_PROVIDER | Build CUDA memory provider | ON/OFF | ON |
 | UMF_BUILD_LIBUMF_POOL_DISJOINT | Build the libumf_pool_disjoint static library | ON/OFF | OFF |
 | UMF_BUILD_LIBUMF_POOL_JEMALLOC | Build the libumf_pool_jemalloc static library | ON/OFF | OFF |
 | UMF_BUILD_TESTS | Build UMF tests | ON/OFF | ON |
@@ -203,6 +205,22 @@ with the `disable_provider_free` parameter set to true.
 1) Linux OS
 2) A length of a path of a file to be mapped can be `PATH_MAX` (4096) characters at most.
 
+#### CUDA memory provider
+
+A memory provider that provides memory from CUDA device.
+
+##### Requirements
+
+1) Linux or Windows OS
+2) The `UMF_BUILD_CUDA_PROVIDER` option turned `ON` (by default)
+
+Additionally, required for tests:
+
+3) The `UMF_BUILD_GPU_TESTS` option turned `ON`
+4) System with CUDA compatible GPU
+5) Required packages:
+   - nvidia-cuda-dev (Linux) or cuda-sdk (Windows)
+
 ### Memory pool managers
 
 #### Proxy pool (part of libumf)