Skip to content

Commit 7c188ab

Browse files
authored
[mlir][LLVMIR][OpenMP] Move reduction allocas before stores (#105683)
Delay implicit reduction var stores until after all allocas. This fixes a couple of cases in existing unit tests. Follow up to #102524
1 parent c50d11e commit 7c188ab

File tree

3 files changed

+38
-15
lines changed

3 files changed

+38
-15
lines changed

flang/test/Lower/OpenMP/parallel-reduction-mixed.f90

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ end subroutine proc
2626
!CHECK: store i32 %[[TID]], ptr %[[TID_LOCAL]]
2727
!CHECK: %[[F_priv:.*]] = alloca ptr
2828
!CHECK: %[[I_priv:.*]] = alloca i32
29-
!CHECK: store ptr %{{.*}}, ptr %[[F_priv]]
3029

3130
!CHECK: omp.reduction.init:
32-
!CHECK: store i32 0, ptr %[[I_priv]]
31+
!CHECK: store ptr %{{.*}}, ptr %[[F_priv]]
32+
!CHECK: store i32 0, ptr %[[I_priv]]
3333

3434
!CHECK: omp.par.region:
3535
!CHECK: br label %[[MALLOC_BB:.*]]

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -592,7 +592,20 @@ convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
592592
return bodyGenStatus;
593593
}
594594

595+
namespace {
596+
/// Contains the arguments for an LLVM store operation
597+
struct DeferredStore {
598+
DeferredStore(llvm::Value *value, llvm::Value *address)
599+
: value(value), address(address) {}
600+
601+
llvm::Value *value;
602+
llvm::Value *address;
603+
};
604+
} // namespace
605+
595606
/// Allocate space for privatized reduction variables.
607+
/// `deferredStores` contains information to create store operations which needs
608+
/// to be inserted after all allocas
596609
template <typename T>
597610
static LogicalResult
598611
allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
@@ -602,13 +615,13 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
602615
SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
603616
SmallVectorImpl<llvm::Value *> &privateReductionVariables,
604617
DenseMap<Value, llvm::Value *> &reductionVariableMap,
618+
SmallVectorImpl<DeferredStore> &deferredStores,
605619
llvm::ArrayRef<bool> isByRefs) {
606620
llvm::IRBuilderBase::InsertPointGuard guard(builder);
607621
builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
608622

609623
// delay creating stores until after all allocas
610-
SmallVector<std::pair<llvm::Value *, llvm::Value *>> storesToCreate;
611-
storesToCreate.reserve(loop.getNumReductionVars());
624+
deferredStores.reserve(loop.getNumReductionVars());
612625

613626
for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) {
614627
Region &allocRegion = reductionDecls[i].getAllocRegion();
@@ -628,7 +641,7 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
628641
// variable allocated in the inlined region)
629642
llvm::Value *var = builder.CreateAlloca(
630643
moduleTranslation.convertType(reductionDecls[i].getType()));
631-
storesToCreate.emplace_back(phis[0], var);
644+
deferredStores.emplace_back(phis[0], var);
632645

633646
privateReductionVariables[i] = var;
634647
moduleTranslation.mapValue(reductionArgs[i], phis[0]);
@@ -644,10 +657,6 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
644657
}
645658
}
646659

647-
// TODO: further delay this so it doesn't come in the entry block at all
648-
for (auto [data, addr] : storesToCreate)
649-
builder.CreateStore(data, addr);
650-
651660
return success();
652661
}
653662

@@ -819,12 +828,19 @@ static LogicalResult allocAndInitializeReductionVars(
819828
if (op.getNumReductionVars() == 0)
820829
return success();
821830

831+
SmallVector<DeferredStore> deferredStores;
832+
822833
if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation,
823834
allocaIP, reductionDecls,
824835
privateReductionVariables, reductionVariableMap,
825-
isByRef)))
836+
deferredStores, isByRef)))
826837
return failure();
827838

839+
// store result of the alloc region to the allocated pointer to the real
840+
// reduction variable
841+
for (auto [data, addr] : deferredStores)
842+
builder.CreateStore(data, addr);
843+
828844
// Before the loop, store the initial values of reductions into reduction
829845
// variables. Although this could be done after allocas, we don't want to mess
830846
// up with the alloca insertion point.
@@ -1359,6 +1375,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
13591375
collectReductionDecls(opInst, reductionDecls);
13601376
SmallVector<llvm::Value *> privateReductionVariables(
13611377
opInst.getNumReductionVars());
1378+
SmallVector<DeferredStore> deferredStores;
13621379

13631380
auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
13641381
// Allocate reduction vars
@@ -1373,10 +1390,10 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
13731390
InsertPointTy(allocaIP.getBlock(),
13741391
allocaIP.getBlock()->getTerminator()->getIterator());
13751392

1376-
if (failed(allocReductionVars(opInst, reductionArgs, builder,
1377-
moduleTranslation, allocaIP, reductionDecls,
1378-
privateReductionVariables,
1379-
reductionVariableMap, isByRef)))
1393+
if (failed(allocReductionVars(
1394+
opInst, reductionArgs, builder, moduleTranslation, allocaIP,
1395+
reductionDecls, privateReductionVariables, reductionVariableMap,
1396+
deferredStores, isByRef)))
13801397
bodyGenStatus = failure();
13811398

13821399
// Initialize reduction vars
@@ -1401,6 +1418,12 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
14011418

14021419
builder.SetInsertPoint(initBlock->getFirstNonPHIOrDbgOrAlloca());
14031420

1421+
// insert stores deferred until after all allocas
1422+
// these store the results of the alloc region into the allocation for the
1423+
// pointer to the reduction variable
1424+
for (auto [data, addr] : deferredStores)
1425+
builder.CreateStore(data, addr);
1426+
14041427
for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
14051428
SmallVector<llvm::Value *> phis;
14061429

mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute
8989
// CHECK: %[[VAL_13:.*]] = load i32, ptr %[[VAL_10]], align 4
9090
// CHECK: %[[VAL_20:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
9191
// CHECK: %[[VAL_21:.*]] = alloca ptr, align 8
92-
// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_21]], align 8
9392
// CHECK: %[[VAL_14:.*]] = alloca [1 x ptr], align 8
9493
// CHECK: br label %[[VAL_15:.*]]
9594
// CHECK: omp.reduction.init: ; preds = %[[VAL_16:.*]]
@@ -98,6 +97,7 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute
9897
// CHECK: br label %[[VAL_18:.*]]
9998
// CHECK: omp.par.region1: ; preds = %[[VAL_17]]
10099
// CHECK: %[[VAL_19:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
100+
// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_21]], align 8
101101
// CHECK: br label %[[VAL_22:.*]]
102102
// CHECK: omp_section_loop.preheader: ; preds = %[[VAL_18]]
103103
// CHECK: store i32 0, ptr %[[VAL_7]], align 4

0 commit comments

Comments
 (0)