@@ -592,7 +592,20 @@ convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
592
592
return bodyGenStatus;
593
593
}
594
594
595
+ namespace {
596
+ // / Contains the arguments for an LLVM store operation
597
+ struct DeferredStore {
598
+ DeferredStore (llvm::Value *value, llvm::Value *address)
599
+ : value(value), address(address) {}
600
+
601
+ llvm::Value *value;
602
+ llvm::Value *address;
603
+ };
604
+ } // namespace
605
+
595
606
// / Allocate space for privatized reduction variables.
607
+ // / `deferredStores` contains information to create store operations which needs
608
+ // / to be inserted after all allocas
596
609
template <typename T>
597
610
static LogicalResult
598
611
allocReductionVars (T loop, ArrayRef<BlockArgument> reductionArgs,
@@ -602,13 +615,13 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
602
615
SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
603
616
SmallVectorImpl<llvm::Value *> &privateReductionVariables,
604
617
DenseMap<Value, llvm::Value *> &reductionVariableMap,
618
+ SmallVectorImpl<DeferredStore> &deferredStores,
605
619
llvm::ArrayRef<bool > isByRefs) {
606
620
llvm::IRBuilderBase::InsertPointGuard guard (builder);
607
621
builder.SetInsertPoint (allocaIP.getBlock ()->getTerminator ());
608
622
609
623
// delay creating stores until after all allocas
610
- SmallVector<std::pair<llvm::Value *, llvm::Value *>> storesToCreate;
611
- storesToCreate.reserve (loop.getNumReductionVars ());
624
+ deferredStores.reserve (loop.getNumReductionVars ());
612
625
613
626
for (std::size_t i = 0 ; i < loop.getNumReductionVars (); ++i) {
614
627
Region &allocRegion = reductionDecls[i].getAllocRegion ();
@@ -628,7 +641,7 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
628
641
// variable allocated in the inlined region)
629
642
llvm::Value *var = builder.CreateAlloca (
630
643
moduleTranslation.convertType (reductionDecls[i].getType ()));
631
- storesToCreate .emplace_back (phis[0 ], var);
644
+ deferredStores .emplace_back (phis[0 ], var);
632
645
633
646
privateReductionVariables[i] = var;
634
647
moduleTranslation.mapValue (reductionArgs[i], phis[0 ]);
@@ -644,10 +657,6 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
644
657
}
645
658
}
646
659
647
- // TODO: further delay this so it doesn't come in the entry block at all
648
- for (auto [data, addr] : storesToCreate)
649
- builder.CreateStore (data, addr);
650
-
651
660
return success ();
652
661
}
653
662
@@ -819,12 +828,19 @@ static LogicalResult allocAndInitializeReductionVars(
819
828
if (op.getNumReductionVars () == 0 )
820
829
return success ();
821
830
831
+ SmallVector<DeferredStore> deferredStores;
832
+
822
833
if (failed (allocReductionVars (op, reductionArgs, builder, moduleTranslation,
823
834
allocaIP, reductionDecls,
824
835
privateReductionVariables, reductionVariableMap,
825
- isByRef)))
836
+ deferredStores, isByRef)))
826
837
return failure ();
827
838
839
+ // store result of the alloc region to the allocated pointer to the real
840
+ // reduction variable
841
+ for (auto [data, addr] : deferredStores)
842
+ builder.CreateStore (data, addr);
843
+
828
844
// Before the loop, store the initial values of reductions into reduction
829
845
// variables. Although this could be done after allocas, we don't want to mess
830
846
// up with the alloca insertion point.
@@ -1359,6 +1375,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
1359
1375
collectReductionDecls (opInst, reductionDecls);
1360
1376
SmallVector<llvm::Value *> privateReductionVariables (
1361
1377
opInst.getNumReductionVars ());
1378
+ SmallVector<DeferredStore> deferredStores;
1362
1379
1363
1380
auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
1364
1381
// Allocate reduction vars
@@ -1373,10 +1390,10 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
1373
1390
InsertPointTy (allocaIP.getBlock (),
1374
1391
allocaIP.getBlock ()->getTerminator ()->getIterator ());
1375
1392
1376
- if (failed (allocReductionVars (opInst, reductionArgs, builder,
1377
- moduleTranslation, allocaIP, reductionDecls ,
1378
- privateReductionVariables,
1379
- reductionVariableMap , isByRef)))
1393
+ if (failed (allocReductionVars (
1394
+ opInst, reductionArgs, builder, moduleTranslation, allocaIP,
1395
+ reductionDecls, privateReductionVariables, reductionVariableMap ,
1396
+ deferredStores , isByRef)))
1380
1397
bodyGenStatus = failure ();
1381
1398
1382
1399
// Initialize reduction vars
@@ -1401,6 +1418,12 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
1401
1418
1402
1419
builder.SetInsertPoint (initBlock->getFirstNonPHIOrDbgOrAlloca ());
1403
1420
1421
+ // insert stores deferred until after all allocas
1422
+ // these store the results of the alloc region into the allocation for the
1423
+ // pointer to the reduction variable
1424
+ for (auto [data, addr] : deferredStores)
1425
+ builder.CreateStore (data, addr);
1426
+
1404
1427
for (unsigned i = 0 ; i < opInst.getNumReductionVars (); ++i) {
1405
1428
SmallVector<llvm::Value *> phis;
1406
1429
0 commit comments