@@ -161,6 +161,11 @@ class DataSharingProcessor {
161
161
const Fortran::parser::OmpClauseList &opClauseList;
162
162
Fortran::lower::pft::Evaluation &eval;
163
163
164
+ bool useDelayedPrivatizationWhenPossible;
165
+ Fortran::lower::SymMap *symTable;
166
+ llvm::SetVector<mlir::SymbolRefAttr> privateInitializers;
167
+ llvm::SetVector<mlir::Value> privateSymHostAddrsses;
168
+
164
169
bool needBarrier ();
165
170
void collectSymbols (Fortran::semantics::Symbol::Flag flag);
166
171
void collectOmpObjectListSymbol (
@@ -182,10 +187,14 @@ class DataSharingProcessor {
182
187
public:
183
188
DataSharingProcessor (Fortran::lower::AbstractConverter &converter,
184
189
const Fortran::parser::OmpClauseList &opClauseList,
185
- Fortran::lower::pft::Evaluation &eval)
190
+ Fortran::lower::pft::Evaluation &eval,
191
+ bool useDelayedPrivatizationWhenPossible = false ,
192
+ Fortran::lower::SymMap *symTable = nullptr )
186
193
: hasLastPrivateOp(false ), converter(converter),
187
194
firOpBuilder (converter.getFirOpBuilder()), opClauseList(opClauseList),
188
- eval(eval) {}
195
+ eval(eval), useDelayedPrivatizationWhenPossible(
196
+ useDelayedPrivatizationWhenPossible),
197
+ symTable(symTable) {}
189
198
// Privatisation is split into two steps.
190
199
// Step1 performs cloning of all privatisation clauses and copying for
191
200
// firstprivates. Step1 is performed at the place where process/processStep1
@@ -204,6 +213,14 @@ class DataSharingProcessor {
204
213
assert (!loopIV && " Loop iteration variable already set" );
205
214
loopIV = iv;
206
215
}
216
+
217
+ const llvm::SetVector<mlir::SymbolRefAttr> &getPrivateInitializers () const {
218
+ return privateInitializers;
219
+ };
220
+
221
+ const llvm::SetVector<mlir::Value> &getPrivateSymHostAddrsses () const {
222
+ return privateSymHostAddrsses;
223
+ }
207
224
};
208
225
209
226
void DataSharingProcessor::processStep1 () {
@@ -496,8 +513,46 @@ void DataSharingProcessor::privatize() {
496
513
copyFirstPrivateSymbol (&*mem);
497
514
}
498
515
} else {
499
- cloneSymbol (sym);
500
- copyFirstPrivateSymbol (sym);
516
+ if (useDelayedPrivatizationWhenPossible) {
517
+ auto ip = firOpBuilder.saveInsertionPoint ();
518
+
519
+ auto moduleOp = firOpBuilder.getInsertionBlock ()
520
+ ->getParentOp ()
521
+ ->getParentOfType <mlir::ModuleOp>();
522
+
523
+ firOpBuilder.setInsertionPoint (&moduleOp.getBodyRegion ().front (),
524
+ moduleOp.getBodyRegion ().front ().end ());
525
+
526
+ Fortran::lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol (*sym);
527
+ assert (hsb && " Host symbol box not found" );
528
+
529
+ auto symType = hsb.getAddr ().getType ();
530
+ auto symLoc = hsb.getAddr ().getLoc ();
531
+ auto privatizerOp = firOpBuilder.create <mlir::omp::PrivateClauseOp>(
532
+ symLoc, symType, sym->name ().ToString ());
533
+ firOpBuilder.setInsertionPointToEnd (&privatizerOp.getBody ().front ());
534
+
535
+ symTable->pushScope ();
536
+ symTable->addSymbol (*sym, privatizerOp.getArgument (0 ));
537
+ symTable->pushScope ();
538
+
539
+ cloneSymbol (sym);
540
+ copyFirstPrivateSymbol (sym);
541
+
542
+ firOpBuilder.create <mlir::omp::YieldOp>(
543
+ hsb.getAddr ().getLoc (),
544
+ symTable->shallowLookupSymbol (*sym).getAddr ());
545
+
546
+ symTable->popScope ();
547
+ symTable->popScope ();
548
+ firOpBuilder.restoreInsertionPoint (ip);
549
+
550
+ privateInitializers.insert (mlir::SymbolRefAttr::get (privatizerOp));
551
+ privateSymHostAddrsses.insert (hsb.getAddr ());
552
+ } else {
553
+ cloneSymbol (sym);
554
+ copyFirstPrivateSymbol (sym);
555
+ }
501
556
}
502
557
}
503
558
}
@@ -2480,12 +2535,12 @@ static OpTy genOpWithBody(Fortran::lower::AbstractConverter &converter,
2480
2535
Fortran::lower::pft::Evaluation &eval, bool genNested,
2481
2536
mlir::Location currentLocation, bool outerCombined,
2482
2537
const Fortran::parser::OmpClauseList *clauseList,
2483
- Args &&...args) {
2538
+ DataSharingProcessor *dsp, Args &&...args) {
2484
2539
auto op = converter.getFirOpBuilder ().create <OpTy>(
2485
2540
currentLocation, std::forward<Args>(args)...);
2486
2541
createBodyOfOp<OpTy>(op, converter, currentLocation, eval, genNested,
2487
2542
clauseList,
2488
- /* args=*/ {}, outerCombined);
2543
+ /* args=*/ {}, outerCombined, dsp );
2489
2544
return op;
2490
2545
}
2491
2546
@@ -2497,21 +2552,25 @@ genMasterOp(Fortran::lower::AbstractConverter &converter,
2497
2552
currentLocation,
2498
2553
/* outerCombined=*/ false ,
2499
2554
/* clauseList=*/ nullptr ,
2555
+ /* dsp=*/ nullptr ,
2500
2556
/* resultTypes=*/ mlir::TypeRange ());
2501
2557
}
2502
2558
2503
2559
static mlir::omp::OrderedRegionOp
2504
2560
genOrderedRegionOp (Fortran::lower::AbstractConverter &converter,
2505
2561
Fortran::lower::pft::Evaluation &eval, bool genNested,
2506
2562
mlir::Location currentLocation) {
2507
- return genOpWithBody<mlir::omp::OrderedRegionOp>(
2508
- converter, eval, genNested, currentLocation,
2509
- /* outerCombined=*/ false ,
2510
- /* clauseList=*/ nullptr , /* simd=*/ false );
2563
+ return genOpWithBody<mlir::omp::OrderedRegionOp>(converter, eval, genNested,
2564
+ currentLocation,
2565
+ /* outerCombined=*/ false ,
2566
+ /* clauseList=*/ nullptr ,
2567
+ /* dsp=*/ nullptr ,
2568
+ /* simd=*/ false );
2511
2569
}
2512
2570
2513
2571
static mlir::omp::ParallelOp
2514
2572
genParallelOp (Fortran::lower::AbstractConverter &converter,
2573
+ Fortran::lower::SymMap &symTable,
2515
2574
Fortran::lower::pft::Evaluation &eval, bool genNested,
2516
2575
mlir::Location currentLocation,
2517
2576
const Fortran::parser::OmpClauseList &clauseList,
@@ -2533,16 +2592,37 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
2533
2592
if (!outerCombined)
2534
2593
cp.processReduction (currentLocation, reductionVars, reductionDeclSymbols);
2535
2594
2595
+ bool privatize = !outerCombined;
2596
+ DataSharingProcessor dsp (converter, clauseList, eval,
2597
+ /* useDelayedPrivatizationWhenPossible=*/ true ,
2598
+ &symTable);
2599
+
2600
+ if (privatize) {
2601
+ dsp.processStep1 ();
2602
+ }
2603
+
2604
+ llvm::SmallVector<mlir::Attribute> privateInits (
2605
+ dsp.getPrivateInitializers ().begin (), dsp.getPrivateInitializers ().end ());
2606
+
2607
+ llvm::SmallVector<mlir::Value> privateSymAddresses (
2608
+ dsp.getPrivateSymHostAddrsses ().begin (),
2609
+ dsp.getPrivateSymHostAddrsses ().end ());
2610
+
2536
2611
return genOpWithBody<mlir::omp::ParallelOp>(
2537
2612
converter, eval, genNested, currentLocation, outerCombined, &clauseList,
2613
+ &dsp,
2538
2614
/* resultTypes=*/ mlir::TypeRange (), ifClauseOperand,
2539
2615
numThreadsClauseOperand, allocateOperands, allocatorOperands,
2540
- reductionVars,
2616
+ reductionVars, privateSymAddresses,
2541
2617
reductionDeclSymbols.empty ()
2542
2618
? nullptr
2543
2619
: mlir::ArrayAttr::get (converter.getFirOpBuilder ().getContext (),
2544
2620
reductionDeclSymbols),
2545
- procBindKindAttr);
2621
+ procBindKindAttr,
2622
+ privateInits.empty ()
2623
+ ? nullptr
2624
+ : mlir::ArrayAttr::get (converter.getFirOpBuilder ().getContext (),
2625
+ privateInits));
2546
2626
}
2547
2627
2548
2628
static mlir::omp::SectionOp
@@ -2554,7 +2634,8 @@ genSectionOp(Fortran::lower::AbstractConverter &converter,
2554
2634
// all privatization is done within `omp.section` operations.
2555
2635
return genOpWithBody<mlir::omp::SectionOp>(
2556
2636
converter, eval, genNested, currentLocation,
2557
- /* outerCombined=*/ false , §ionsClauseList);
2637
+ /* outerCombined=*/ false , §ionsClauseList,
2638
+ /* dsp=*/ nullptr );
2558
2639
}
2559
2640
2560
2641
static mlir::omp::SingleOp
@@ -2575,8 +2656,8 @@ genSingleOp(Fortran::lower::AbstractConverter &converter,
2575
2656
2576
2657
return genOpWithBody<mlir::omp::SingleOp>(
2577
2658
converter, eval, genNested, currentLocation,
2578
- /* outerCombined=*/ false , &beginClauseList, allocateOperands ,
2579
- allocatorOperands, nowaitAttr);
2659
+ /* outerCombined=*/ false , &beginClauseList, /* dsp= */ nullptr ,
2660
+ allocateOperands, allocatorOperands, nowaitAttr);
2580
2661
}
2581
2662
2582
2663
static mlir::omp::TaskOp
@@ -2608,8 +2689,8 @@ genTaskOp(Fortran::lower::AbstractConverter &converter,
2608
2689
2609
2690
return genOpWithBody<mlir::omp::TaskOp>(
2610
2691
converter, eval, genNested, currentLocation,
2611
- /* outerCombined=*/ false , &clauseList, ifClauseOperand, finalClauseOperand ,
2612
- untiedAttr, mergeableAttr,
2692
+ /* outerCombined=*/ false , &clauseList, /* dsp= */ nullptr , ifClauseOperand ,
2693
+ finalClauseOperand, untiedAttr, mergeableAttr,
2613
2694
/* in_reduction_vars=*/ mlir::ValueRange (),
2614
2695
/* in_reductions=*/ nullptr , priorityClauseOperand,
2615
2696
dependTypeOperands.empty ()
@@ -2632,6 +2713,7 @@ genTaskGroupOp(Fortran::lower::AbstractConverter &converter,
2632
2713
return genOpWithBody<mlir::omp::TaskGroupOp>(
2633
2714
converter, eval, genNested, currentLocation,
2634
2715
/* outerCombined=*/ false , &clauseList,
2716
+ /* dsp=*/ nullptr ,
2635
2717
/* task_reduction_vars=*/ mlir::ValueRange (),
2636
2718
/* task_reductions=*/ nullptr , allocateOperands, allocatorOperands);
2637
2719
}
@@ -3015,6 +3097,7 @@ genTeamsOp(Fortran::lower::AbstractConverter &converter,
3015
3097
3016
3098
return genOpWithBody<mlir::omp::TeamsOp>(
3017
3099
converter, eval, genNested, currentLocation, outerCombined, &clauseList,
3100
+ /* dsp=*/ nullptr ,
3018
3101
/* num_teams_lower=*/ nullptr , numTeamsClauseOperand, ifClauseOperand,
3019
3102
threadLimitClauseOperand, allocateOperands, allocatorOperands,
3020
3103
reductionVars,
@@ -3413,8 +3496,8 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
3413
3496
if ((llvm::omp::allParallelSet & llvm::omp::loopConstructSet)
3414
3497
.test (ompDirective)) {
3415
3498
validDirective = true ;
3416
- genParallelOp (converter, eval, /* genNested=*/ false , currentLocation ,
3417
- loopOpClauseList,
3499
+ genParallelOp (converter, symTable, eval, /* genNested=*/ false ,
3500
+ currentLocation, loopOpClauseList,
3418
3501
/* outerCombined=*/ true );
3419
3502
}
3420
3503
}
@@ -3502,8 +3585,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
3502
3585
genOrderedRegionOp (converter, eval, /* genNested=*/ true , currentLocation);
3503
3586
break ;
3504
3587
case llvm::omp::Directive::OMPD_parallel:
3505
- genParallelOp (converter, eval, /* genNested=*/ true , currentLocation ,
3506
- beginClauseList);
3588
+ genParallelOp (converter, symTable, eval, /* genNested=*/ true ,
3589
+ currentLocation, beginClauseList);
3507
3590
break ;
3508
3591
case llvm::omp::Directive::OMPD_single:
3509
3592
genSingleOp (converter, eval, /* genNested=*/ true , currentLocation,
@@ -3562,8 +3645,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
3562
3645
.test (directive.v )) {
3563
3646
bool outerCombined =
3564
3647
directive.v != llvm::omp::Directive::OMPD_target_parallel;
3565
- genParallelOp (converter, eval, /* genNested=*/ false , currentLocation ,
3566
- beginClauseList, outerCombined);
3648
+ genParallelOp (converter, symTable, eval, /* genNested=*/ false ,
3649
+ currentLocation, beginClauseList, outerCombined);
3567
3650
combinedDirective = true ;
3568
3651
}
3569
3652
if ((llvm::omp::workShareSet & llvm::omp::blockConstructSet)
@@ -3646,7 +3729,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
3646
3729
3647
3730
// Parallel wrapper of PARALLEL SECTIONS construct
3648
3731
if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
3649
- genParallelOp (converter, eval,
3732
+ genParallelOp (converter, symTable, eval,
3650
3733
/* genNested=*/ false , currentLocation, sectionsClauseList,
3651
3734
/* outerCombined=*/ true );
3652
3735
} else {
@@ -3663,6 +3746,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
3663
3746
/* genNested=*/ false , currentLocation,
3664
3747
/* outerCombined=*/ false ,
3665
3748
/* clauseList=*/ nullptr ,
3749
+ /* dsp=*/ nullptr ,
3666
3750
/* reduction_vars=*/ mlir::ValueRange (),
3667
3751
/* reductions=*/ nullptr , allocateOperands,
3668
3752
allocatorOperands, nowaitClauseOperand);
0 commit comments