-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[flang][cuda] Accept constant as src for cuf.data_tranfer #92951
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Assignment of a constant to a device variable is a special case that can be further lowered to cudaMemsetor similar function. This patch update the lowering to avoid the creation of a temporary when we assign a constant to a device variable.
@llvm/pr-subscribers-flang-fir-hlfir Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesAssignment of a constant (host) to a device variable is a special case that can be further lowered to Full diff: https://github.com/llvm/llvm-project/pull/92951.diff 4 Files Affected:
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
index f2992997c42cb..37b8da0181955 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
@@ -158,7 +158,7 @@ def cuf_DataTransferOp : cuf_Op<"data_transfer", []> {
updated.
}];
- let arguments = (ins Arg<AnyRefOrBoxType, "", [MemRead]>:$src,
+ let arguments = (ins Arg<AnyType, "", [MemRead]>:$src,
Arg<AnyRefOrBoxType, "", [MemWrite]>:$dst,
cuf_DataTransferKindAttr:$transfer_kind);
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 3e0a6da7fc327..898b37504a6e6 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -57,6 +57,7 @@
#include "flang/Semantics/symbol.h"
#include "flang/Semantics/tools.h"
#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
+#include "mlir/IR/Matchers.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/Parser/Parser.h"
#include "mlir/Transforms/RegionUtils.h"
@@ -3798,11 +3799,17 @@ class FirConverter : public Fortran::lower::AbstractConverter {
auto transferKindAttr = cuf::DataTransferKindAttr::get(
builder.getContext(), cuf::DataTransferKind::HostDevice);
if (!rhs.isVariable()) {
- auto associate = hlfir::genAssociateExpr(
- loc, builder, rhs, rhs.getType(), ".cuf_host_tmp");
- builder.create<cuf::DataTransferOp>(loc, associate.getBase(), lhsVal,
- transferKindAttr);
- builder.create<hlfir::EndAssociateOp>(loc, associate);
+ // Special case if the rhs is a constant.
+ if (matchPattern(rhs.getDefiningOp(), mlir::m_Constant())) {
+ builder.create<cuf::DataTransferOp>(loc, rhs, lhsVal,
+ transferKindAttr);
+ } else {
+ auto associate = hlfir::genAssociateExpr(
+ loc, builder, rhs, rhs.getType(), ".cuf_host_tmp");
+ builder.create<cuf::DataTransferOp>(loc, associate.getBase(), lhsVal,
+ transferKindAttr);
+ builder.create<hlfir::EndAssociateOp>(loc, associate);
+ }
} else {
builder.create<cuf::DataTransferOp>(loc, rhsVal, lhsVal,
transferKindAttr);
diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
index 0446c1db86b16..2c0c4c2cfae34 100644
--- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
+++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
@@ -99,7 +99,12 @@ mlir::LogicalResult cuf::DataTransferOp::verify() {
if ((fir::isa_ref_type(srcTy) && fir::isa_ref_type(dstTy)) ||
(fir::isa_box_type(srcTy) && fir::isa_box_type(dstTy)))
return mlir::success();
- return emitOpError("expect src and dst to be both references or descriptors");
+ if (fir::isa_trivial(srcTy) &&
+ matchPattern(getSrc().getDefiningOp(), mlir::m_Constant()))
+ return mlir::success();
+ return emitOpError()
+ << "expect src and dst to be both references or descriptors or src to "
+ "be a constant";
}
//===----------------------------------------------------------------------===//
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index e23792e6efc55..42fa4d09c95e0 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -25,6 +25,8 @@ subroutine sub1()
adev = ahost + bhost
+ adev = 10
+
end
! CHECK-LABEL: func.func @_QPsub1()
@@ -41,10 +43,7 @@ end
! CHECK: cuf.data_transfer %[[ASSOC]]#0 to %[[M]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<i32>, !fir.ref<i32>
! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref<i32>, i1
-! CHECK: %[[C1:.*]] = arith.constant 1 : i32
-! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[C1]] {uniq_name = ".cuf_host_tmp"} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
-! CHECK: cuf.data_transfer %[[ASSOC]]#0 to %[[M]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<i32>, !fir.ref<i32>
-! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref<i32>, i1
+! CHECK: cuf.data_transfer %c1{{.*}} to %[[M]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : i32, !fir.ref<i32>
! CHECK: cuf.data_transfer %[[AHOST]]#0 to %[[ADEV]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
@@ -62,6 +61,8 @@ end
! CHECK: cuf.data_transfer %[[ASSOC]]#0 to %[[ADEV]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref<!fir.array<10xi32>>, i1
+! CHECK: cuf.data_transfer %c10{{.*}} to %[[ADEV]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : i32, !fir.ref<!fir.array<10xi32>>
+
subroutine sub2()
integer, device :: m
integer, device :: adev(10), bdev(10)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Assignment of a constant (host) to a device variable is a special case that can be further lowered to
cudaMemset
or similar functions. This patch update the lowering to avoid the creation of a temporary when we assign a constant to a device variable.