@@ -3710,16 +3710,18 @@ class FirConverter : public Fortran::lower::AbstractConverter {
3710
3710
return false ;
3711
3711
}
3712
3712
3713
- static void genCUDADataTransfer (fir::FirOpBuilder &builder,
3714
- mlir::Location loc, bool lhsIsDevice,
3715
- hlfir::Entity &lhs, bool rhsIsDevice,
3716
- hlfir::Entity &rhs) {
3713
+ void genCUDADataTransfer (fir::FirOpBuilder &builder, mlir::Location loc,
3714
+ const Fortran::evaluate::Assignment &assign,
3715
+ hlfir::Entity &lhs, hlfir::Entity &rhs) {
3716
+ bool lhsIsDevice = Fortran::evaluate::HasCUDAAttrs (assign.lhs );
3717
+ bool rhsIsDevice = Fortran::evaluate::HasCUDAAttrs (assign.rhs );
3717
3718
if (rhs.isBoxAddressOrValue () || lhs.isBoxAddressOrValue ())
3718
3719
TODO (loc, " CUDA data transfler with descriptors" );
3720
+
3721
+ // device = host
3719
3722
if (lhsIsDevice && !rhsIsDevice) {
3720
3723
auto transferKindAttr = fir::CUDADataTransferKindAttr::get (
3721
3724
builder.getContext (), fir::CUDADataTransferKind::HostDevice);
3722
- // device = host
3723
3725
if (!rhs.isVariable ()) {
3724
3726
auto associate = hlfir::genAssociateExpr (
3725
3727
loc, builder, rhs, rhs.getType (), " .cuf_host_tmp" );
@@ -3732,7 +3734,73 @@ class FirConverter : public Fortran::lower::AbstractConverter {
3732
3734
}
3733
3735
return ;
3734
3736
}
3735
- TODO (loc, " Assignement with CUDA Fortran variables" );
3737
+
3738
+ // host = device
3739
+ if (!lhsIsDevice && rhsIsDevice) {
3740
+ auto transferKindAttr = fir::CUDADataTransferKindAttr::get (
3741
+ builder.getContext (), fir::CUDADataTransferKind::DeviceHost);
3742
+ if (!rhs.isVariable ()) {
3743
+ // evaluateRhs loads scalar. Look for the memory reference to be used in
3744
+ // the transfer.
3745
+ if (mlir::isa_and_nonnull<fir::LoadOp>(rhs.getDefiningOp ())) {
3746
+ auto loadOp = mlir::dyn_cast<fir::LoadOp>(rhs.getDefiningOp ());
3747
+ builder.create <fir::CUDADataTransferOp>(loc, loadOp.getMemref (), lhs,
3748
+ transferKindAttr);
3749
+ return ;
3750
+ }
3751
+ } else {
3752
+ builder.create <fir::CUDADataTransferOp>(loc, rhs, lhs,
3753
+ transferKindAttr);
3754
+ }
3755
+ return ;
3756
+ }
3757
+
3758
+ if (lhsIsDevice && rhsIsDevice) {
3759
+ assert (rhs.isVariable () && " CUDA Fortran assignment rhs is not legal" );
3760
+ auto transferKindAttr = fir::CUDADataTransferKindAttr::get (
3761
+ builder.getContext (), fir::CUDADataTransferKind::DeviceDevice);
3762
+ builder.create <fir::CUDADataTransferOp>(loc, rhs, lhs, transferKindAttr);
3763
+ return ;
3764
+ }
3765
+ llvm_unreachable (" Unhandled CUDA data transfer" );
3766
+ }
3767
+
3768
+ llvm::SmallVector<mlir::Value>
3769
+ genCUDAImplicitDataTransfer (fir::FirOpBuilder &builder, mlir::Location loc,
3770
+ const Fortran::evaluate::Assignment &assign) {
3771
+ llvm::SmallVector<mlir::Value> temps;
3772
+ localSymbols.pushScope ();
3773
+ auto transferKindAttr = fir::CUDADataTransferKindAttr::get (
3774
+ builder.getContext (), fir::CUDADataTransferKind::DeviceHost);
3775
+ unsigned nbDeviceResidentObject = 0 ;
3776
+ for (const Fortran::semantics::Symbol &sym :
3777
+ Fortran::evaluate::CollectSymbols (assign.rhs )) {
3778
+ if (const auto *details =
3779
+ sym.GetUltimate ()
3780
+ .detailsIf <Fortran::semantics::ObjectEntityDetails>()) {
3781
+ if (details->cudaDataAttr ()) {
3782
+ if (sym.owner ().IsDerivedType () && IsAllocatable (sym.GetUltimate ()))
3783
+ TODO (loc, " Device resident allocatable derived-type component" );
3784
+ // TODO: This should probably being checked in semantic and give a
3785
+ // proper error.
3786
+ assert (
3787
+ nbDeviceResidentObject <= 1 &&
3788
+ " Only one reference to the device resident object is supported" );
3789
+ auto addr = getSymbolAddress (sym);
3790
+ hlfir::Entity entity{addr};
3791
+ auto [temp, cleanup] =
3792
+ hlfir::createTempFromMold (loc, builder, entity);
3793
+ auto needCleanup = fir::getIntIfConstant (cleanup);
3794
+ if (needCleanup && *needCleanup)
3795
+ temps.push_back (temp);
3796
+ addSymbol (sym, temp, /* forced=*/ true );
3797
+ builder.create <fir::CUDADataTransferOp>(loc, addr, temp,
3798
+ transferKindAttr);
3799
+ ++nbDeviceResidentObject;
3800
+ }
3801
+ }
3802
+ }
3803
+ return temps;
3736
3804
}
3737
3805
3738
3806
void genDataAssignment (
@@ -3741,8 +3809,13 @@ class FirConverter : public Fortran::lower::AbstractConverter {
3741
3809
mlir::Location loc = getCurrentLocation ();
3742
3810
fir::FirOpBuilder &builder = getFirOpBuilder ();
3743
3811
3744
- bool lhsIsDevice = Fortran::evaluate::HasCUDAAttrs (assign.lhs );
3745
- bool rhsIsDevice = Fortran::evaluate::HasCUDAAttrs (assign.rhs );
3812
+ bool isCUDATransfer = Fortran::evaluate::HasCUDAAttrs (assign.lhs ) ||
3813
+ Fortran::evaluate::HasCUDAAttrs (assign.rhs );
3814
+ bool hasCUDAImplicitTransfer =
3815
+ Fortran::evaluate::HasCUDAImplicitTransfer (assign.rhs );
3816
+ llvm::SmallVector<mlir::Value> implicitTemps;
3817
+ if (hasCUDAImplicitTransfer)
3818
+ implicitTemps = genCUDAImplicitDataTransfer (builder, loc, assign);
3746
3819
3747
3820
// Gather some information about the assignment that will impact how it is
3748
3821
// lowered.
@@ -3800,12 +3873,16 @@ class FirConverter : public Fortran::lower::AbstractConverter {
3800
3873
Fortran::lower::StatementContext localStmtCtx;
3801
3874
hlfir::Entity rhs = evaluateRhs (localStmtCtx);
3802
3875
hlfir::Entity lhs = evaluateLhs (localStmtCtx);
3803
- if (lhsIsDevice || rhsIsDevice) {
3804
- genCUDADataTransfer (builder, loc, lhsIsDevice , lhs, rhsIsDevice , rhs);
3805
- } else {
3876
+ if (isCUDATransfer && !hasCUDAImplicitTransfer)
3877
+ genCUDADataTransfer (builder, loc, assign , lhs, rhs);
3878
+ else
3806
3879
builder.create <hlfir::AssignOp>(loc, rhs, lhs,
3807
3880
isWholeAllocatableAssignment,
3808
3881
keepLhsLengthInAllocatableAssignment);
3882
+ if (hasCUDAImplicitTransfer) {
3883
+ localSymbols.popScope ();
3884
+ for (mlir::Value temp : implicitTemps)
3885
+ builder.create <fir::FreeMemOp>(loc, temp);
3809
3886
}
3810
3887
return ;
3811
3888
}
0 commit comments