Skip to content

[flang][OpenMP] Fix LASTPRIVATE for iteration variables #69773

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 43 additions & 15 deletions flang/lib/Lower/OpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ class DataSharingProcessor {
bool hasLastPrivateOp;
mlir::OpBuilder::InsertPoint lastPrivIP;
mlir::OpBuilder::InsertPoint insPt;
mlir::Value loopIV;
// Symbols in private, firstprivate, and/or lastprivate clauses.
llvm::SetVector<const Fortran::semantics::Symbol *> privatizedSymbols;
llvm::SetVector<const Fortran::semantics::Symbol *> defaultSymbols;
Expand Down Expand Up @@ -157,6 +158,11 @@ class DataSharingProcessor {
// dealocation code as well.
void processStep1();
void processStep2(mlir::Operation *op, bool isLoop);

void setLoopIV(mlir::Value iv) {
assert(!loopIV && "Loop iteration variable already set");
loopIV = iv;
}
};

void DataSharingProcessor::processStep1() {
Expand Down Expand Up @@ -270,7 +276,6 @@ void DataSharingProcessor ::insertBarrier() {
}

void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) {
mlir::arith::CmpIOp cmpOp;
bool cmpCreated = false;
mlir::OpBuilder::InsertPoint localInsPt = firOpBuilder.saveInsertionPoint();
for (const Fortran::parser::OmpClause &clause : opClauseList.v) {
Expand Down Expand Up @@ -349,34 +354,55 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) {
}
}
} else if (mlir::isa<mlir::omp::WsLoopOp>(op)) {
mlir::Operation *lastOper = op->getRegion(0).back().getTerminator();
firOpBuilder.setInsertionPoint(lastOper);

// Update the original variable just before exiting the worksharing
// loop. Conversion as follows:
//
// omp.wsloop {
// omp.wsloop { ...
// ... store
// store ===> %cmp = llvm.icmp "eq" %iv %ub
// omp.yield fir.if %cmp {
// } ^%lpv_update_blk:
// store ===> %v = arith.addi %iv, %step
// omp.yield %cmp = %step < 0 ? %v < %ub : %v > %ub
// } fir.if %cmp {
// fir.store %v to %loopIV
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: An additional fit.store here is redundant. We expect all last private operations to be abstracted within ^%lpv_update_blk

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fir.store here is needed. Considering the example of the previous conversation, loopIV is updated in the beginning of the loop body only, to have the same value as iv, but we need it to hold v instead (iv + step). Also, ^%lpv_update_blk copies loopIV to ia and not iv to loopIV. In the end this store gets optimized, because ^%lpv_update_blk generates a load loopIV right after it.

// ^%lpv_update_blk:
// }
// omp.yield
// }
//

// Only generate the compare once in presence of multiple LastPrivate
// clauses.
if (!cmpCreated) {
cmpOp = firOpBuilder.create<mlir::arith::CmpIOp>(
op->getLoc(), mlir::arith::CmpIPredicate::eq,
op->getRegion(0).front().getArguments()[0],
mlir::dyn_cast<mlir::omp::WsLoopOp>(op).getUpperBound()[0]);
}
auto ifOp =
firOpBuilder.create<fir::IfOp>(op->getLoc(), cmpOp, /*else*/ false);
if (cmpCreated)
continue;
cmpCreated = true;

mlir::Location loc = op->getLoc();
mlir::Operation *lastOper = op->getRegion(0).back().getTerminator();
firOpBuilder.setInsertionPoint(lastOper);

mlir::Value iv = op->getRegion(0).front().getArguments()[0];
mlir::Value ub =
mlir::dyn_cast<mlir::omp::WsLoopOp>(op).getUpperBound()[0];
mlir::Value step = mlir::dyn_cast<mlir::omp::WsLoopOp>(op).getStep()[0];

// v = iv + step
// cmp = step < 0 ? v < ub : v > ub
mlir::Value v = firOpBuilder.create<mlir::arith::AddIOp>(loc, iv, step);
mlir::Value zero =
firOpBuilder.createIntegerConstant(loc, step.getType(), 0);
mlir::Value negativeStep = firOpBuilder.create<mlir::arith::CmpIOp>(
loc, mlir::arith::CmpIPredicate::slt, step, zero);
mlir::Value vLT = firOpBuilder.create<mlir::arith::CmpIOp>(
loc, mlir::arith::CmpIPredicate::slt, v, ub);
mlir::Value vGT = firOpBuilder.create<mlir::arith::CmpIOp>(
loc, mlir::arith::CmpIPredicate::sgt, v, ub);
mlir::Value cmpOp = firOpBuilder.create<mlir::arith::SelectOp>(
loc, negativeStep, vLT, vGT);

auto ifOp = firOpBuilder.create<fir::IfOp>(loc, cmpOp, /*else*/ false);
firOpBuilder.setInsertionPointToStart(&ifOp.getThenRegion().front());
assert(loopIV && "loopIV was not set");
firOpBuilder.create<fir::StoreOp>(op->getLoc(), v, loopIV);
lastPrivIP = firOpBuilder.saveInsertionPoint();
} else {
TODO(converter.getCurrentLocation(),
Expand Down Expand Up @@ -2128,6 +2154,8 @@ static void createBodyOfOp(
proc.processStep1();
proc.processStep2(op, is_loop);
} else {
if (is_loop && args.size() > 0)
dsp->setLoopIV(converter.getSymbolAddress(*args[0]));
dsp->processStep2(op, is_loop);
}

Expand Down
18 changes: 12 additions & 6 deletions flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,18 @@
!CHECK: %[[val_c1_i32_0:.*]] = arith.constant 1 : i32
!CHECK: omp.wsloop for (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_c100_i32]]) inclusive step (%[[val_c1_i32_0]]) {
!CHECK: fir.store %[[arg]] to %[[val_0]] : !fir.ref<i32>
!CHECK: %[[val_11:.*]] = arith.cmpi eq, %[[arg]], %[[val_c100_i32]] : i32
!CHECK: fir.if %[[val_11]] {
!CHECK: %[[val_12:.*]] = fir.load %[[val_9]] : !fir.ref<f32>
!CHECK: fir.store %[[val_12]] to %[[val_5]] : !fir.ref<f32>
!CHECK: %[[val_13:.*]] = fir.load %[[val_10]] : !fir.ref<f32>
!CHECK: fir.store %[[val_13]] to %[[val_8]] : !fir.ref<f32>
!CHECK: %[[val_11:.*]] = arith.addi %[[arg]], %[[val_c1_i32_0]] : i32
!CHECK: %[[val_c0_i32:.*]] = arith.constant 0 : i32
!CHECK: %[[val_12:.*]] = arith.cmpi slt, %[[val_c1_i32_0]], %[[val_c0_i32]] : i32
!CHECK: %[[val_13:.*]] = arith.cmpi slt, %[[val_11]], %[[val_c100_i32]] : i32
!CHECK: %[[val_14:.*]] = arith.cmpi sgt, %[[val_11]], %[[val_c100_i32]] : i32
!CHECK: %[[val_15:.*]] = arith.select %[[val_12]], %[[val_13]], %[[val_14]] : i1
!CHECK: fir.if %[[val_15]] {
!CHECK: fir.store %[[val_11]] to %[[val_0]] : !fir.ref<i32>
!CHECK: %[[val_16:.*]] = fir.load %[[val_9]] : !fir.ref<f32>
!CHECK: fir.store %[[val_16]] to %[[val_5]] : !fir.ref<f32>
!CHECK: %[[val_17:.*]] = fir.load %[[val_10]] : !fir.ref<f32>
!CHECK: fir.store %[[val_17]] to %[[val_8]] : !fir.ref<f32>
!CHECK: }
!CHECK: omp.yield
!CHECK: }
Expand Down
60 changes: 48 additions & 12 deletions flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
! This test checks lowering of `FIRSTPRIVATE` clause for scalar types.
! This test checks lowering of `LASTPRIVATE` clause for scalar types.

! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s
! RUN: flang-new -fc1 -fopenmp -emit-fir %s -o - | FileCheck %s
Expand All @@ -24,8 +24,14 @@
!CHECK-NEXT: %[[CALL_END_IO:.*]] = fir.call @_FortranAioEndIoStatement(%[[CALL_BEGIN_IO]])

! Testing last iteration check
!CHECK-NEXT: %[[IV_CMP:.*]] = arith.cmpi eq, %[[INDX_WS]]
!CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
!CHECK: %[[C0:.*]] = arith.constant 0 : i32
!CHECK: %[[T1:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32
!CHECK: %[[T2:.*]] = arith.cmpi slt, %[[V]], %{{.*}} : i32
!CHECK: %[[T3:.*]] = arith.cmpi sgt, %[[V]], %{{.*}} : i32
!CHECK: %[[IV_CMP:.*]] = arith.select %[[T1]], %[[T2]], %[[T3]] : i1
!CHECK: fir.if %[[IV_CMP]] {
!CHECK: fir.store %[[V]] to %{{.*}} : !fir.ref<i32>

! Testing lastprivate val update
!CHECK-DAG: %[[CVT:.*]] = fir.convert %[[ARG1_REF]] : (!fir.ref<!fir.char<1,5>>) -> !fir.ref<i8>
Expand All @@ -52,8 +58,14 @@ subroutine lastprivate_character(arg1)
!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {

! Testing last iteration check
!CHECK-DAG: %[[IV_CMP:.*]] = arith.cmpi eq, %[[INDX_WS]]
!CHECK-DAG: fir.if %[[IV_CMP]] {
!CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
!CHECK: %[[C0:.*]] = arith.constant 0 : i32
!CHECK: %[[T1:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32
!CHECK: %[[T2:.*]] = arith.cmpi slt, %[[V]], %{{.*}} : i32
!CHECK: %[[T3:.*]] = arith.cmpi sgt, %[[V]], %{{.*}} : i32
!CHECK: %[[IV_CMP:.*]] = arith.select %[[T1]], %[[T2]], %[[T3]] : i1
!CHECK: fir.if %[[IV_CMP]] {
!CHECK: fir.store %[[V]] to %{{.*}} : !fir.ref<i32>

! Testing lastprivate val update
!CHECK-NEXT: %[[CLONE_LD:.*]] = fir.load %[[CLONE]] : !fir.ref<i32>
Expand Down Expand Up @@ -81,8 +93,14 @@ subroutine lastprivate_int(arg1)
!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {

! Testing last iteration check
!CHECK: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]]
!CHECK-NEXT: fir.if %[[IV_CMP1]] {
!CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
!CHECK: %[[C0:.*]] = arith.constant 0 : i32
!CHECK: %[[T1:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32
!CHECK: %[[T2:.*]] = arith.cmpi slt, %[[V]], %{{.*}} : i32
!CHECK: %[[T3:.*]] = arith.cmpi sgt, %[[V]], %{{.*}} : i32
!CHECK: %[[IV_CMP:.*]] = arith.select %[[T1]], %[[T2]], %[[T3]] : i1
!CHECK: fir.if %[[IV_CMP]] {
!CHECK: fir.store %[[V]] to %{{.*}} : !fir.ref<i32>
! Testing lastprivate val update
!CHECK-DAG: %[[CLONE_LD1:.*]] = fir.load %[[CLONE1]] : !fir.ref<i32>
!CHECK-DAG: fir.store %[[CLONE_LD1]] to %[[ARG1]] : !fir.ref<i32>
Expand Down Expand Up @@ -112,8 +130,14 @@ subroutine mult_lastprivate_int(arg1, arg2)
!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {

!Testing last iteration check
!CHECK: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]]
!CHECK-NEXT: fir.if %[[IV_CMP1]] {
!CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
!CHECK: %[[C0:.*]] = arith.constant 0 : i32
!CHECK: %[[T1:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32
!CHECK: %[[T2:.*]] = arith.cmpi slt, %[[V]], %{{.*}} : i32
!CHECK: %[[T3:.*]] = arith.cmpi sgt, %[[V]], %{{.*}} : i32
!CHECK: %[[IV_CMP:.*]] = arith.select %[[T1]], %[[T2]], %[[T3]] : i1
!CHECK: fir.if %[[IV_CMP]] {
!CHECK: fir.store %[[V]] to %{{.*}} : !fir.ref<i32>
!Testing lastprivate val update
!CHECK-DAG: %[[CLONE_LD2:.*]] = fir.load %[[CLONE2]] : !fir.ref<i32>
!CHECK-DAG: fir.store %[[CLONE_LD2]] to %[[ARG2]] : !fir.ref<i32>
Expand Down Expand Up @@ -148,8 +172,14 @@ subroutine mult_lastprivate_int2(arg1, arg2)
!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {

! Testing last iteration check
!CHECK: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]]
!CHECK-NEXT: fir.if %[[IV_CMP1]] {
!CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
!CHECK: %[[C0:.*]] = arith.constant 0 : i32
!CHECK: %[[T1:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32
!CHECK: %[[T2:.*]] = arith.cmpi slt, %[[V]], %{{.*}} : i32
!CHECK: %[[T3:.*]] = arith.cmpi sgt, %[[V]], %{{.*}} : i32
!CHECK: %[[IV_CMP:.*]] = arith.select %[[T1]], %[[T2]], %[[T3]] : i1
!CHECK: fir.if %[[IV_CMP]] {
!CHECK: fir.store %[[V]] to %{{.*}} : !fir.ref<i32>
! Testing lastprivate val update
!CHECK-NEXT: %[[CLONE_LD:.*]] = fir.load %[[CLONE2]] : !fir.ref<i32>
!CHECK-NEXT: fir.store %[[CLONE_LD]] to %[[ARG2]] : !fir.ref<i32>
Expand Down Expand Up @@ -179,8 +209,14 @@ subroutine firstpriv_lastpriv_int(arg1, arg2)
!CHECK-NEXT: omp.barrier
!CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} {
! Testing last iteration check
!CHECK: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]]
!CHECK-NEXT: fir.if %[[IV_CMP1]] {
!CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32
!CHECK: %[[C0:.*]] = arith.constant 0 : i32
!CHECK: %[[T1:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32
!CHECK: %[[T2:.*]] = arith.cmpi slt, %[[V]], %{{.*}} : i32
!CHECK: %[[T3:.*]] = arith.cmpi sgt, %[[V]], %{{.*}} : i32
!CHECK: %[[IV_CMP:.*]] = arith.select %[[T1]], %[[T2]], %[[T3]] : i1
!CHECK: fir.if %[[IV_CMP]] {
!CHECK: fir.store %[[V]] to %{{.*}} : !fir.ref<i32>
! Testing lastprivate val update
!CHECK-NEXT: %[[CLONE_LD:.*]] = fir.load %[[CLONE1]] : !fir.ref<i32>
!CHECK-NEXT: fir.store %[[CLONE_LD]] to %[[ARG1]] : !fir.ref<i32>
Expand Down
8 changes: 7 additions & 1 deletion flang/test/Lower/OpenMP/lastprivate-commonblock.f90
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,14 @@
!CHECK: %[[PRIVATE_Y_REF:.*]] = fir.alloca f32 {bindc_name = "y", pinned, uniq_name = "_QFlastprivate_commonEy"}
!CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y_REF]] {uniq_name = "_QFlastprivate_commonEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
!CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) {
!CHECK: %[[LAST_ITER:.*]] = arith.cmpi eq, %[[I]], %{{.*}} : i32
!CHECK: %[[V:.*]] = arith.addi %[[I]], %{{.*}} : i32
!CHECK: %[[C0:.*]] = arith.constant 0 : i32
!CHECK: %[[NEG_STEP:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32
!CHECK: %[[V_LT:.*]] = arith.cmpi slt, %[[V]], %{{.*}} : i32
!CHECK: %[[V_GT:.*]] = arith.cmpi sgt, %[[V]], %{{.*}} : i32
!CHECK: %[[LAST_ITER:.*]] = arith.select %[[NEG_STEP]], %[[V_LT]], %[[V_GT]] : i1
!CHECK: fir.if %[[LAST_ITER]] {
!CHECK: fir.store %[[V]] to %{{.*}} : !fir.ref<i32>
!CHECK: %[[PRIVATE_X_VAL:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref<f32>
!CHECK: hlfir.assign %[[PRIVATE_X_VAL]] to %[[X_DECL]]#0 temporary_lhs : f32, !fir.ref<f32>
!CHECK: %[[PRIVATE_Y_VAL:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref<f32>
Expand Down
66 changes: 66 additions & 0 deletions flang/test/Lower/OpenMP/lastprivate-iv.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
! Test LASTPRIVATE with iteration variable.
! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s

!CHECK-LABEL: func @_QPlastprivate_iv_inc
!CHECK: %[[I_MEM:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
!CHECK: %[[I:.*]]:2 = hlfir.declare %[[I_MEM]] {uniq_name = "_QFlastprivate_iv_incEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[I2_MEM:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFlastprivate_iv_incEi"}
!CHECK: %[[I2:.*]]:2 = hlfir.declare %[[I2_MEM]] {uniq_name = "_QFlastprivate_iv_incEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[LB:.*]] = arith.constant 4 : i32
!CHECK: %[[UB:.*]] = arith.constant 10 : i32
!CHECK: %[[STEP:.*]] = arith.constant 3 : i32
!CHECK: omp.wsloop for (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
!CHECK: fir.store %[[IV]] to %[[I]]#1 : !fir.ref<i32>
!CHECK: %[[V:.*]] = arith.addi %[[IV]], %[[STEP]] : i32
!CHECK: %[[C0:.*]] = arith.constant 0 : i32
!CHECK: %[[STEP_NEG:.*]] = arith.cmpi slt, %[[STEP]], %[[C0]] : i32
!CHECK: %[[V_LT:.*]] = arith.cmpi slt, %[[V]], %[[UB]] : i32
!CHECK: %[[V_GT:.*]] = arith.cmpi sgt, %[[V]], %[[UB]] : i32
!CHECK: %[[CMP:.*]] = arith.select %[[STEP_NEG]], %[[V_LT]], %[[V_GT]] : i1
!CHECK: fir.if %[[CMP]] {
!CHECK: fir.store %[[V]] to %[[I]]#1 : !fir.ref<i32>
!CHECK: %[[I_VAL:.*]] = fir.load %[[I]]#0 : !fir.ref<i32>
!CHECK: hlfir.assign %[[I_VAL]] to %[[I2]]#0 temporary_lhs : i32, !fir.ref<i32>
!CHECK: }
!CHECK: omp.yield
!CHECK: }
subroutine lastprivate_iv_inc()
integer :: i

!$omp do lastprivate(i)
do i = 4, 10, 3
end do
!$omp end do
end subroutine

!CHECK-LABEL: func @_QPlastprivate_iv_dec
!CHECK: %[[I_MEM:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
!CHECK: %[[I:.*]]:2 = hlfir.declare %[[I_MEM]] {uniq_name = "_QFlastprivate_iv_decEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[I2_MEM:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFlastprivate_iv_decEi"}
!CHECK: %[[I2:.*]]:2 = hlfir.declare %[[I2_MEM]] {uniq_name = "_QFlastprivate_iv_decEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[LB:.*]] = arith.constant 10 : i32
!CHECK: %[[UB:.*]] = arith.constant 1 : i32
!CHECK: %[[STEP:.*]] = arith.constant -3 : i32
!CHECK: omp.wsloop for (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
!CHECK: fir.store %[[IV]] to %[[I]]#1 : !fir.ref<i32>
!CHECK: %[[V:.*]] = arith.addi %[[IV]], %[[STEP]] : i32
!CHECK: %[[C0:.*]] = arith.constant 0 : i32
!CHECK: %[[STEP_NEG:.*]] = arith.cmpi slt, %[[STEP]], %[[C0]] : i32
!CHECK: %[[V_LT:.*]] = arith.cmpi slt, %[[V]], %[[UB]] : i32
!CHECK: %[[V_GT:.*]] = arith.cmpi sgt, %[[V]], %[[UB]] : i32
!CHECK: %[[CMP:.*]] = arith.select %[[STEP_NEG]], %[[V_LT]], %[[V_GT]] : i1
!CHECK: fir.if %[[CMP]] {
!CHECK: fir.store %[[V]] to %[[I]]#1 : !fir.ref<i32>
!CHECK: %[[I_VAL:.*]] = fir.load %[[I]]#0 : !fir.ref<i32>
!CHECK: hlfir.assign %[[I_VAL]] to %[[I2]]#0 temporary_lhs : i32, !fir.ref<i32>
!CHECK: }
!CHECK: omp.yield
!CHECK: }
subroutine lastprivate_iv_dec()
integer :: i

!$omp do lastprivate(i)
do i = 10, 1, -3
end do
!$omp end do
end subroutine