Skip to content

[flang][cuda] Convert cuf.sync_descriptor to runtime call #121524

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 3, 2025

Conversation

clementval
Copy link
Contributor

Convert the op to a new entry point in the runtime CUFSyncGlobalDescriptor

@llvmbot llvmbot added flang:runtime flang Flang issues not falling into any other category flang:fir-hlfir labels Jan 2, 2025
@llvmbot
Copy link
Member

llvmbot commented Jan 2, 2025

@llvm/pr-subscribers-flang-fir-hlfir

@llvm/pr-subscribers-flang-runtime

Author: Valentin Clement (バレンタイン クレメン) (clementval)

Changes

Convert the op to a new entry point in the runtime CUFSyncGlobalDescriptor


Full diff: https://github.com/llvm/llvm-project/pull/121524.diff

4 Files Affected:

  • (modified) flang/include/flang/Runtime/CUDA/descriptor.h (+4)
  • (modified) flang/lib/Optimizer/Transforms/CUFOpConversion.cpp (+41-1)
  • (modified) flang/runtime/CUDA/descriptor.cpp (+7)
  • (added) flang/test/Fir/CUDA/cuda-sync-desc.mlir (+20)
diff --git a/flang/include/flang/Runtime/CUDA/descriptor.h b/flang/include/flang/Runtime/CUDA/descriptor.h
index 55878aaac57fb3..0ee7feca10e44c 100644
--- a/flang/include/flang/Runtime/CUDA/descriptor.h
+++ b/flang/include/flang/Runtime/CUDA/descriptor.h
@@ -33,6 +33,10 @@ void *RTDECL(CUFGetDeviceAddress)(
 void RTDECL(CUFDescriptorSync)(Descriptor *dst, const Descriptor *src,
     const char *sourceFile = nullptr, int sourceLine = 0);
 
+/// Get the device address of registered with the \p hostPtr and sync them.
+void RTDECL(CUFSyncGlobalDescriptor)(
+    void *hostPtr, const char *sourceFile = nullptr, int sourceLine = 0);
+
 } // extern "C"
 
 } // namespace Fortran::runtime::cuda
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index fb0ef246546444..f08f9e412b8857 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -788,6 +788,45 @@ struct CUFLaunchOpConversion
   const mlir::SymbolTable &symTab;
 };
 
+struct CUFSyncDescriptorOpConversion
+    : public mlir::OpRewritePattern<cuf::SyncDescriptorOp> {
+  using OpRewritePattern::OpRewritePattern;
+
+  CUFSyncDescriptorOpConversion(mlir::MLIRContext *context,
+                                const mlir::SymbolTable &symTab)
+      : OpRewritePattern(context), symTab{symTab} {}
+
+  mlir::LogicalResult
+  matchAndRewrite(cuf::SyncDescriptorOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+    auto mod = op->getParentOfType<mlir::ModuleOp>();
+    fir::FirOpBuilder builder(rewriter, mod);
+    mlir::Location loc = op.getLoc();
+
+    auto globalOp = mod.lookupSymbol<fir::GlobalOp>(op.getGlobalName());
+    if (!globalOp)
+      return mlir::failure();
+
+    auto hostAddr = builder.create<fir::AddrOfOp>(
+        loc, fir::ReferenceType::get(globalOp.getType()), op.getGlobalName());
+    mlir::func::FuncOp callee =
+        fir::runtime::getRuntimeFunc<mkRTKey(CUFSyncGlobalDescriptor)>(loc,
+                                                                       builder);
+    auto fTy = callee.getFunctionType();
+    mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc);
+    mlir::Value sourceLine =
+        fir::factory::locationToLineNo(builder, loc, fTy.getInput(2));
+    llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
+        builder, loc, fTy, hostAddr, sourceFile, sourceLine)};
+    builder.create<fir::CallOp>(loc, callee, args);
+    op.erase();
+    return mlir::success();
+  }
+
+private:
+  const mlir::SymbolTable &symTab;
+};
+
 class CUFOpConversion : public fir::impl::CUFOpConversionBase<CUFOpConversion> {
 public:
   void runOnOperation() override {
@@ -851,7 +890,8 @@ void cuf::populateCUFToFIRConversionPatterns(
                   CUFFreeOpConversion>(patterns.getContext());
   patterns.insert<CUFDataTransferOpConversion>(patterns.getContext(), symtab,
                                                &dl, &converter);
-  patterns.insert<CUFLaunchOpConversion>(patterns.getContext(), symtab);
+  patterns.insert<CUFLaunchOpConversion, CUFSyncDescriptorOpConversion>(
+      patterns.getContext(), symtab);
 }
 
 void cuf::populateFIRCUFConversionPatterns(const mlir::SymbolTable &symtab,
diff --git a/flang/runtime/CUDA/descriptor.cpp b/flang/runtime/CUDA/descriptor.cpp
index 391c47e84241d4..947eeb66aa3d6c 100644
--- a/flang/runtime/CUDA/descriptor.cpp
+++ b/flang/runtime/CUDA/descriptor.cpp
@@ -46,6 +46,13 @@ void RTDEF(CUFDescriptorSync)(Descriptor *dst, const Descriptor *src,
       (void *)dst, (const void *)src, count, cudaMemcpyHostToDevice));
 }
 
+void RTDEF(CUFSyncGlobalDescriptor)(
+    void *hostPtr, const char *sourceFile, int sourceLine) {
+  void *devAddr{RTNAME(CUFGetDeviceAddress)(hostPtr, sourceFile, sourceLine)};
+  RTNAME(CUFDescriptorSync)
+  ((Descriptor *)devAddr, (Descriptor *)hostPtr, sourceFile, sourceLine);
+}
+
 RT_EXT_API_GROUP_END
 }
 } // namespace Fortran::runtime::cuda
diff --git a/flang/test/Fir/CUDA/cuda-sync-desc.mlir b/flang/test/Fir/CUDA/cuda-sync-desc.mlir
new file mode 100644
index 00000000000000..20b317f34a7f26
--- /dev/null
+++ b/flang/test/Fir/CUDA/cuda-sync-desc.mlir
@@ -0,0 +1,20 @@
+// RUN: fir-opt --cuf-convert %s | FileCheck %s
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<i16 = dense<16> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, i1 = dense<8> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f64 = dense<64> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, "dlti.endianness" = "little", "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 ([email protected]:clementval/llvm-project.git f37e52237791f58438790c77edeb8de08f692987)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
+  fir.global @_QMdevptrEdev_ptr {data_attr = #cuf.cuda<device>} : !fir.box<!fir.ptr<!fir.array<?xf32>>> {
+    %0 = fir.zero_bits !fir.ptr<!fir.array<?xf32>>
+    %c0 = arith.constant 0 : index
+    %1 = fir.shape %c0 : (index) -> !fir.shape<1>
+    %2 = fir.embox %0(%1) {allocator_idx = 2 : i32} : (!fir.ptr<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.ptr<!fir.array<?xf32>>>
+    fir.has_value %2 : !fir.box<!fir.ptr<!fir.array<?xf32>>>
+  }
+  func.func @_QQmain() {
+    cuf.sync_descriptor @_QMdevptrEdev_ptr
+    return
+  }
+}
+
+// CHECK-LABEL: func.func @_QQmain()
+// CHECK: %[[HOST_ADDR:.*]] = fir.address_of(@_QMdevptrEdev_ptr) : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
+// CHECK: %[[HOST_ADDR_PTR:.*]] = fir.convert %[[HOST_ADDR]] : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) -> !fir.llvm_ptr<i8>
+// CHECK: fir.call @_FortranACUFSyncGlobalDescriptor(%[[HOST_ADDR_PTR]], %{{.*}}, %{{.*}}) : (!fir.llvm_ptr<i8>, !fir.ref<i8>, i32)

@clementval clementval changed the base branch from users/clementval/cuf_global_pointer to main January 3, 2025 01:02
@clementval clementval merged commit 6dcd2b0 into llvm:main Jan 3, 2025
12 checks passed
@clementval clementval deleted the cuf_global_pointer2 branch January 3, 2025 01:08
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jan 3, 2025

LLVM Buildbot has detected a new failure on builder ppc64le-flang-rhel-clang running on ppc64le-flang-rhel-test while building flang at step 5 "build-unified-tree".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/157/builds/16247

Here is the relevant piece of the build log for the reference
Step 5 (build-unified-tree) failure: build (failure)
...
64.956 [128/11/6546] Linking CXX shared module lib/BugpointPasses.so
65.218 [128/10/6547] Linking CXX executable bin/llvm-reduce
65.325 [128/9/6548] Linking CXX executable bin/llvm-opt-fuzzer
65.820 [128/8/6549] Linking CXX executable bin/clang-extdef-mapping
67.055 [128/7/6550] Linking CXX executable bin/clang-scan-deps
67.412 [128/6/6551] Linking CXX shared library lib/libclang.so.20.0.0git
67.474 [127/6/6552] Creating library symlink lib/libclang.so.20.0git lib/libclang.so
68.114 [125/7/6553] Linking CXX executable bin/c-arcmt-test
68.244 [125/6/6554] Linking CXX executable bin/clang-check
68.879 [125/5/6555] Building CXX object tools/flang/lib/Optimizer/Transforms/CMakeFiles/FIRTransforms.dir/CUFOpConversion.cpp.o
FAILED: tools/flang/lib/Optimizer/Transforms/CMakeFiles/FIRTransforms.dir/CUFOpConversion.cpp.o 
ccache /home/buildbots/llvm-external-buildbots/clang.16.0.1/bin/clang++ -DFLANG_INCLUDE_TESTS=1 -DFLANG_LITTLE_ENDIAN=1 -DGTEST_HAS_RTTI=0 -D_DEBUG -D_GLIBCXX_ASSERTIONS -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/build/tools/flang/lib/Optimizer/Transforms -I/home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/llvm-project/flang/lib/Optimizer/Transforms -I/home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/llvm-project/flang/include -I/home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/build/tools/flang/include -I/home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/build/include -I/home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/llvm-project/llvm/include -isystem /home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/llvm-project/llvm/../mlir/include -isystem /home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/build/tools/mlir/include -isystem /home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/build/tools/clang/include -isystem /home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/llvm-project/llvm/../clang/include -fPIC -fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time -Werror=unguarded-availability-new -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wmissing-field-initializers -pedantic -Wno-long-long -Wc++98-compat-extra-semi -Wimplicit-fallthrough -Wcovered-switch-default -Wno-noexcept-type -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wsuggest-override -Wstring-conversion -Wmisleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -ffunction-sections -fdata-sections -Werror -Wno-deprecated-copy -Wno-string-conversion -Wno-ctad-maybe-unsupported -Wno-unused-command-line-argument -Wstring-conversion           -Wcovered-switch-default -Wno-nested-anon-types -O3 -DNDEBUG -std=c++17  -fno-exceptions -funwind-tables -fno-rtti -UNDEBUG -MD -MT tools/flang/lib/Optimizer/Transforms/CMakeFiles/FIRTransforms.dir/CUFOpConversion.cpp.o -MF tools/flang/lib/Optimizer/Transforms/CMakeFiles/FIRTransforms.dir/CUFOpConversion.cpp.o.d -o tools/flang/lib/Optimizer/Transforms/CMakeFiles/FIRTransforms.dir/CUFOpConversion.cpp.o -c /home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/llvm-project/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
/home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/llvm-project/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp:827:28: error: private field 'symTab' is not used [-Werror,-Wunused-private-field]
  const mlir::SymbolTable &symTab;
                           ^
1 error generated.
69.024 [125/4/6556] Linking CXX executable bin/mlir-cpu-runner
70.822 [125/3/6557] Linking CXX executable bin/c-index-test
70.825 [125/2/6558] Building CXX object lib/LTO/CMakeFiles/LLVMLTO.dir/LTO.cpp.o
177.679 [125/1/6559] Building CXX object tools/flang/lib/Optimizer/CodeGen/CMakeFiles/FIRCodeGen.dir/CodeGen.cpp.o
ninja: build stopped: subcommand failed.

clementval added a commit that referenced this pull request Jan 3, 2025
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jan 3, 2025

LLVM Buildbot has detected a new failure on builder ppc64-flang-aix running on ppc64-flang-aix-test while building flang at step 5 "build-unified-tree".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/201/builds/502

Here is the relevant piece of the build log for the reference
Step 5 (build-unified-tree) failure: build (failure)
...
-- Compiler-RT supported architectures: powerpc;powerpc64
-- check-shadowcallstack does nothing.
-- Configuring done (2.5s)
-- Generating done (0.2s)
-- Build files have been written to: /home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/build/runtimes/runtimes-bins
12.927 [28/3/9] Performing build step for 'runtimes'
0.754 [0/1/1] Generating /home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/build/compile_commands.json
13.829 [27/3/10] No install step for 'runtimes'
14.100 [26/3/12] Completed 'runtimes'
27.512 [26/2/13] Building CXX object tools/flang/lib/Optimizer/Transforms/CMakeFiles/FIRTransforms.dir/CUFOpConversion.cpp.o
FAILED: tools/flang/lib/Optimizer/Transforms/CMakeFiles/FIRTransforms.dir/CUFOpConversion.cpp.o 
/home/llvm/llvm-external-buildbots/clang.17.0.2/bin/clang++ -DFLANG_BIG_ENDIAN=1 -DFLANG_INCLUDE_TESTS=1 -DGTEST_HAS_RTTI=0 -D_DEBUG -D_GLIBCXX_ASSERTIONS -D_LARGE_FILE_API -D_XOPEN_SOURCE=700 -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/build/tools/flang/lib/Optimizer/Transforms -I/home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/llvm-project/flang/lib/Optimizer/Transforms -I/home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/llvm-project/flang/include -I/home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/build/tools/flang/include -I/home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/build/include -I/home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/llvm-project/llvm/include -isystem /home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/llvm-project/llvm/../mlir/include -isystem /home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/build/tools/mlir/include -isystem /home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/build/tools/clang/include -isystem /home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/llvm-project/llvm/../clang/include -mcmodel=large -fPIC -Werror=date-time -Werror=unguarded-availability-new -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wmissing-field-initializers -pedantic -Wno-long-long -Wc++98-compat-extra-semi -Wimplicit-fallthrough -Wcovered-switch-default -Wno-noexcept-type -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wsuggest-override -Wstring-conversion -Wmisleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -ffunction-sections -fdata-sections -Werror -Wno-deprecated-copy -Wno-string-conversion -Wno-ctad-maybe-unsupported -Wno-unused-command-line-argument -Wstring-conversion           -Wcovered-switch-default -Wno-nested-anon-types -O3 -DNDEBUG -std=c++17  -fno-exceptions -funwind-tables -fno-rtti -UNDEBUG -MD -MT tools/flang/lib/Optimizer/Transforms/CMakeFiles/FIRTransforms.dir/CUFOpConversion.cpp.o -MF tools/flang/lib/Optimizer/Transforms/CMakeFiles/FIRTransforms.dir/CUFOpConversion.cpp.o.d -o tools/flang/lib/Optimizer/Transforms/CMakeFiles/FIRTransforms.dir/CUFOpConversion.cpp.o -c /home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/llvm-project/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
/home/llvm/llvm-external-buildbots/workers/ppc64-flang-aix-test/ppc64-flang-aix-build/llvm-project/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp:827:28: error: private field 'symTab' is not used [-Werror,-Wunused-private-field]
  827 |   const mlir::SymbolTable &symTab;
      |                            ^
1 error generated.
94.484 [26/1/14] Building CXX object tools/flang/lib/Optimizer/CodeGen/CMakeFiles/FIRCodeGen.dir/CodeGen.cpp.o
ninja: build stopped: subcommand failed.

github-actions bot pushed a commit to arm/arm-toolchain that referenced this pull request Jan 10, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
flang:fir-hlfir flang:runtime flang Flang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants