[flang][cuda] Lower clock64 to nvvm intrinsic #127155

clementval · 2025-02-14T01:30:35Z

No description provided.

llvmbot · 2025-02-14T01:31:06Z

@llvm/pr-subscribers-flang-fir-hlfir

Author: Valentin Clement (バレンタインクレメン) (clementval)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/127155.diff

4 Files Affected:

(modified) flang/include/flang/Optimizer/Builder/IntrinsicCall.h (+1)
(modified) flang/lib/Optimizer/Builder/IntrinsicCall.cpp (+11)
(modified) flang/module/cudadevice.f90 (+5)
(modified) flang/test/Lower/CUDA/cuda-device-proc.cuf (+5)

diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 47e8a77fa6aec..65732ce7f3224 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -231,6 +231,7 @@ struct IntrinsicLibrary {
   void genCFProcPointer(llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genCFunLoc(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genCLoc(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
+  mlir::Value genClock64(mlir::Type, llvm::ArrayRef<mlir::Value>);
   template <mlir::arith::CmpIPredicate pred>
   fir::ExtendedValue genCPtrCompare(mlir::Type,
                                     llvm::ArrayRef<fir::ExtendedValue>);
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 3dc8d217ef38e..93744fa58ebc0 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -209,6 +209,7 @@ static constexpr IntrinsicHandler handlers[]{
      &I::genChdir,
      {{{"name", asAddr}, {"status", asAddr, handleDynamicOptional}}},
      /*isElemental=*/false},
+    {"clock64", &I::genClock64, {}, /*isElemental=*/false},
     {"cmplx",
      &I::genCmplx,
      {{{"x", asValue}, {"y", asValue, handleDynamicOptional}}}},
@@ -3228,6 +3229,16 @@ IntrinsicLibrary::genChdir(std::optional<mlir::Type> resultType,
   return {};
 }
 
+// CLOCK64
+mlir::Value IntrinsicLibrary::genClock64(mlir::Type resultType,
+                                         llvm::ArrayRef<mlir::Value> args) {
+  constexpr llvm::StringLiteral funcName = "llvm.nvvm.read.ptx.sreg.clock64";
+  mlir::MLIRContext *context = builder.getContext();
+  mlir::FunctionType ftype = mlir::FunctionType::get(context, {}, {resultType});
+  auto funcOp = builder.createFunction(loc, funcName, ftype);
+  return builder.create<fir::CallOp>(loc, funcOp, args).getResult(0);
+}
+
 // CMPLX
 mlir::Value IntrinsicLibrary::genCmplx(mlir::Type resultType,
                                        llvm::ArrayRef<mlir::Value> args) {
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
index 45b9f2c838638..ed126a1253908 100644
--- a/flang/module/cudadevice.f90
+++ b/flang/module/cudadevice.f90
@@ -628,5 +628,10 @@ attributes(device) pure integer function atomicdeci(address, val)
   end interface
   public :: atomicdec
 
+  interface
+    attributes(device) integer(8) function clock64()
+    end function
+  end interface
+  public :: clock64
 
 end module
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 17a6a1d965640..6a5524102c0ea 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -9,6 +9,7 @@ attributes(global) subroutine devsub()
   real(8) :: ad
   integer(4) :: ai
   integer(8) :: al
+  integer(8) :: time
 
   call syncthreads()
   call syncwarp(1)
@@ -43,6 +44,8 @@ attributes(global) subroutine devsub()
   ai = atomicor(ai, 1_4)
   ai = atomicinc(ai, 1_4)
   ai = atomicdec(ai, 1_4)
+
+  time = clock64()
 end
 
 ! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
@@ -79,6 +82,8 @@ end
 ! CHECK: %{{.*}} = llvm.atomicrmw uinc_wrap  %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
 ! CHECK: %{{.*}} = llvm.atomicrmw udec_wrap  %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
 
+! CHECK: fir.call @llvm.nvvm.read.ptx.sreg.clock64()
+
 subroutine host1()
   integer, device :: a(32)
   integer, device :: ret

[flang][cuda] Lower clock64 to nvvm intrinsic

41d3531

clementval requested review from wangzpgi and Renaud-K February 14, 2025 01:30

llvmbot added flang Flang issues not falling into any other category flang:fir-hlfir labels Feb 14, 2025

wangzpgi approved these changes Feb 14, 2025

View reviewed changes

Renaud-K approved these changes Feb 14, 2025

View reviewed changes

clementval merged commit 910be4f into llvm:main Feb 14, 2025
11 checks passed

clementval deleted the cuf_clock64 branch February 14, 2025 02:59

joaosaffran pushed a commit to joaosaffran/llvm-project that referenced this pull request Feb 14, 2025

[flang][cuda] Lower clock64 to nvvm intrinsic (llvm#127155)

a65c355

sivan-shani pushed a commit to sivan-shani/llvm-project that referenced this pull request Feb 24, 2025

[flang][cuda] Lower clock64 to nvvm intrinsic (llvm#127155)

2afd0f3

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[flang][cuda] Lower clock64 to nvvm intrinsic #127155

[flang][cuda] Lower clock64 to nvvm intrinsic #127155

Uh oh!

clementval commented Feb 14, 2025

Uh oh!

llvmbot commented Feb 14, 2025

Uh oh!

Uh oh!

Uh oh!

[flang][cuda] Lower clock64 to nvvm intrinsic #127155

[flang][cuda] Lower clock64 to nvvm intrinsic #127155

Uh oh!

Conversation

clementval commented Feb 14, 2025

Uh oh!

llvmbot commented Feb 14, 2025

Uh oh!

Uh oh!

Uh oh!