Skip to content

Commit f11b3de

Browse files
authored
[flang][cuda] Carry over the CUDA attribute in target rewrite (#136811)
1 parent 6c56160 commit f11b3de

File tree

2 files changed

+7
-2
lines changed

2 files changed

+7
-2
lines changed

flang/lib/Optimizer/CodeGen/TargetRewrite.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,11 @@ class TargetRewrite : public fir::impl::TargetRewritePassBase<TargetRewrite> {
531531
if (callOp.getClusterSizeZ())
532532
newCall.getClusterSizeZMutable().assign(callOp.getClusterSizeZ());
533533
newCallResults.append(newCall.result_begin(), newCall.result_end());
534+
if (auto cudaProcAttr =
535+
callOp->template getAttrOfType<cuf::ProcAttributeAttr>(
536+
cuf::getProcAttrName())) {
537+
newCall->setAttr(cuf::getProcAttrName(), cudaProcAttr);
538+
}
534539
} else if constexpr (std::is_same_v<std::decay_t<A>, fir::CallOp>) {
535540
fir::CallOp newCall;
536541
if (callOp.getCallee()) {

flang/test/Fir/CUDA/cuda-target-rewrite.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ gpu.module @testmod {
4545
func.func @main(%arg0: complex<f64>) {
4646
%0 = llvm.mlir.constant(0 : i64) : i64
4747
%1 = llvm.mlir.constant(0 : i32) : i32
48-
gpu.launch_func @testmod::@_QPtest blocks in (%0, %0, %0) threads in (%0, %0, %0) : i64 dynamic_shared_memory_size %1 args(%arg0 : complex<f64>)
48+
gpu.launch_func @testmod::@_QPtest blocks in (%0, %0, %0) threads in (%0, %0, %0) : i64 dynamic_shared_memory_size %1 args(%arg0 : complex<f64>) {cuf.proc_attr = #cuf.cuda_proc<global>}
4949
return
5050
}
5151

@@ -54,4 +54,4 @@ func.func @main(%arg0: complex<f64>) {
5454
// CHECK-LABEL: gpu.func @_QPtest
5555
// CHECK-SAME: (%arg0: f64, %arg1: f64) kernel {
5656
// CHECK: gpu.return
57-
// CHECK: gpu.launch_func @testmod::@_QPtest blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) : i64 dynamic_shared_memory_size %{{.*}} args(%{{.*}} : f64, %{{.*}} : f64)
57+
// CHECK: gpu.launch_func @testmod::@_QPtest blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) : i64 dynamic_shared_memory_size %{{.*}} args(%{{.*}} : f64, %{{.*}} : f64) {cuf.proc_attr = #cuf.cuda_proc<global>}

0 commit comments

Comments
 (0)