Skip to content

Commit a195e2d

Browse files
authored
[MLIR][OpenMP] Handle privatization for global values in MLIR->LLVM translation (#104407)
Potential fix for #102939 and #102949. The issues occurs because the CodeExtractor component only collect inputs (to the parallel regions) that are defined in the same function in which the parallel regions is present. Howerver, this is problematic because if we are privatizing a global value (e.g. a `target` variable which is emitted as a global), then we miss finding that input and we do not privatize the variable. This commit attempts to fix the issue by adding a flag to the CodeExtractor so that we can collect global inputs.
1 parent 46a4132 commit a195e2d

File tree

5 files changed

+65
-7
lines changed

5 files changed

+65
-7
lines changed

flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,5 @@ end program compilation_to_obj
5757
! LLVM: @[[GLOB_VAR:[^[:space:]]+]]t = internal global
5858

5959
! LLVM: define internal void @_QQmain..omp_par
60-
! LLVM: %[[LOCAL_VAR:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
61-
! LLVM-NEXT: %[[GLOB_VAL:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @[[GLOB_VAR]]t, align 8
62-
! LLVM-NEXT: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[GLOB_VAL]], ptr %[[LOCAL_VAR]], align 8
60+
! LLVM: %[[GLOB_VAL:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @[[GLOB_VAR]]t, align 8
61+
! LLVM-NEXT: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[GLOB_VAL]], ptr %{{.*}}, align 8

llvm/include/llvm/Transforms/Utils/CodeExtractor.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,8 @@ class CodeExtractorAnalysisCache {
187187
/// sets, before extraction occurs. These modifications won't have any
188188
/// significant impact on the cost however.
189189
void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
190-
const ValueSet &Allocas) const;
190+
const ValueSet &Allocas,
191+
bool CollectGlobalInputs = false) const;
191192

192193
/// Check if life time marker nodes can be hoisted/sunk into the outline
193194
/// region.

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1548,7 +1548,16 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
15481548
BasicBlock *CommonExit = nullptr;
15491549
SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
15501550
Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1551-
Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
1551+
1552+
Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1553+
/*CollectGlobalInputs=*/true);
1554+
1555+
Inputs.remove_if([&](Value *I) {
1556+
if (auto *GV = dyn_cast_if_present<GlobalVariable>(I))
1557+
return GV->getValueType() == OpenMPIRBuilder::Ident;
1558+
1559+
return false;
1560+
});
15521561

15531562
LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n");
15541563

llvm/lib/Transforms/Utils/CodeExtractor.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -632,14 +632,17 @@ bool CodeExtractor::isEligible() const {
632632
}
633633

634634
void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
635-
const ValueSet &SinkCands) const {
635+
const ValueSet &SinkCands,
636+
bool CollectGlobalInputs) const {
636637
for (BasicBlock *BB : Blocks) {
637638
// If a used value is defined outside the region, it's an input. If an
638639
// instruction is used outside the region, it's an output.
639640
for (Instruction &II : *BB) {
640641
for (auto &OI : II.operands()) {
641642
Value *V = OI;
642-
if (!SinkCands.count(V) && definedInCaller(Blocks, V))
643+
if (!SinkCands.count(V) &&
644+
(definedInCaller(Blocks, V) ||
645+
(CollectGlobalInputs && llvm::isa<llvm::GlobalVariable>(V))))
643646
Inputs.insert(V);
644647
}
645648

mlir/test/Target/LLVMIR/openmp-firstprivate.mlir

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,3 +156,49 @@ llvm.func @foo()
156156
// CHECK: %[[STR_LEN:.*]] = extractvalue { ptr, i64 } %{{.*}}, 1
157157
// CHECK: %{{.*}} = alloca i8, i64 %[[STR_LEN]], align 1
158158
// CHECK: call void @foo()
159+
160+
// -----
161+
162+
// Verifies fix for https://github.com/llvm/llvm-project/issues/102939.
163+
//
164+
// The issues occurs because the CodeExtractor component only collect inputs
165+
// (to the parallel regions) that are defined in the same function in which the
166+
// parallel regions is present. Howerver, this is problematic because if we are
167+
// privatizing a global value (e.g. a `target` variable which is emitted as a
168+
// global), then we miss finding that input and we do not privatize the
169+
// variable.
170+
171+
omp.private {type = firstprivate} @global_privatizer : !llvm.ptr alloc {
172+
^bb0(%arg0: !llvm.ptr):
173+
%0 = llvm.mlir.constant(1 : i64) : i64
174+
%1 = llvm.alloca %0 x f32 {bindc_name = "global", pinned} : (i64) -> !llvm.ptr
175+
omp.yield(%1 : !llvm.ptr)
176+
} copy {
177+
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
178+
%0 = llvm.load %arg0 : !llvm.ptr -> f32
179+
llvm.store %0, %arg1 : f32, !llvm.ptr
180+
omp.yield(%arg1 : !llvm.ptr)
181+
}
182+
183+
llvm.func @global_accessor() {
184+
%global_addr = llvm.mlir.addressof @global : !llvm.ptr
185+
omp.parallel private(@global_privatizer %global_addr -> %arg0 : !llvm.ptr) {
186+
%1 = llvm.mlir.constant(3.140000e+00 : f32) : f32
187+
llvm.store %1, %arg0 : f32, !llvm.ptr
188+
omp.terminator
189+
}
190+
llvm.return
191+
}
192+
193+
llvm.mlir.global internal @global() {addr_space = 0 : i32} : f32 {
194+
%0 = llvm.mlir.zero : f32
195+
llvm.return %0 : f32
196+
}
197+
198+
// CHECK-LABEL: @global_accessor..omp_par({{.*}})
199+
// CHECK-NEXT: omp.par.entry:
200+
// Verify that we found the privatizer by checking that we properly inlined the
201+
// bodies of the alloc and copy regions.
202+
// CHECK: %[[PRIV_ALLOC:.*]] = alloca float, i64 1, align 4
203+
// CHECK: %[[GLOB_VAL:.*]] = load float, ptr @global, align 4
204+
// CHECK: store float %[[GLOB_VAL]], ptr %[[PRIV_ALLOC]], align 4

0 commit comments

Comments
 (0)