@@ -3810,12 +3810,14 @@ class FirConverter : public Fortran::lower::AbstractConverter {
3810
3810
mlir::Location loc = getCurrentLocation ();
3811
3811
fir::FirOpBuilder &builder = getFirOpBuilder ();
3812
3812
3813
+ bool isInDeviceContext =
3814
+ builder.getRegion ().getParentOfType <fir::CUDAKernelOp>();
3813
3815
bool isCUDATransfer = Fortran::evaluate::HasCUDAAttrs (assign.lhs ) ||
3814
3816
Fortran::evaluate::HasCUDAAttrs (assign.rhs );
3815
3817
bool hasCUDAImplicitTransfer =
3816
3818
Fortran::evaluate::HasCUDAImplicitTransfer (assign.rhs );
3817
3819
llvm::SmallVector<mlir::Value> implicitTemps;
3818
- if (hasCUDAImplicitTransfer)
3820
+ if (hasCUDAImplicitTransfer && !isInDeviceContext )
3819
3821
implicitTemps = genCUDAImplicitDataTransfer (builder, loc, assign);
3820
3822
3821
3823
// Gather some information about the assignment that will impact how it is
@@ -3874,13 +3876,13 @@ class FirConverter : public Fortran::lower::AbstractConverter {
3874
3876
Fortran::lower::StatementContext localStmtCtx;
3875
3877
hlfir::Entity rhs = evaluateRhs (localStmtCtx);
3876
3878
hlfir::Entity lhs = evaluateLhs (localStmtCtx);
3877
- if (isCUDATransfer && !hasCUDAImplicitTransfer)
3879
+ if (isCUDATransfer && !hasCUDAImplicitTransfer && !isInDeviceContext )
3878
3880
genCUDADataTransfer (builder, loc, assign, lhs, rhs);
3879
3881
else
3880
3882
builder.create <hlfir::AssignOp>(loc, rhs, lhs,
3881
3883
isWholeAllocatableAssignment,
3882
3884
keepLhsLengthInAllocatableAssignment);
3883
- if (hasCUDAImplicitTransfer) {
3885
+ if (hasCUDAImplicitTransfer && !isInDeviceContext ) {
3884
3886
localSymbols.popScope ();
3885
3887
for (mlir::Value temp : implicitTemps)
3886
3888
builder.create <fir::FreeMemOp>(loc, temp);
0 commit comments