Skip to content

Commit e295f5d

Browse files
authored
[OpenMP][IRBuilder] Don't initialize kmp_dep_info instances in alloc regions (#131795)
Fixes #121289 Given the following MLIR, where a variable: `x`, is `private` for the `omp.parallel` op and a `depend` for the `omp.task` op: ```mlir omp.private {type = private} @_QFEx_private_i32 : i32 llvm.func @nested_task_with_deps() { %0 = llvm.mlir.constant(1 : i64) : i64 %1 = llvm.alloca %0 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr omp.parallel private(@_QFEx_private_i32 %1 -> %arg0 : !llvm.ptr) { omp.task depend(taskdependout -> %arg0 : !llvm.ptr) { omp.terminator } omp.terminator } llvm.return } ``` Before the fix proposed by this PR, the IR builder would emit the allocation and the initialzation logic for the task's depedency info struct in the parent function's alloc region: ```llvm define void @nested_task_with_deps() { .... %.dep.arr.addr = alloca [1 x %struct.kmp_dep_info], align 8 %2 = getelementptr inbounds [1 x %struct.kmp_dep_info], ptr %.dep.arr.addr, i64 0, i64 0 %3 = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %2, i32 0, i32 0 %4 = ptrtoint ptr %omp.private.alloc to i64 store i64 %4, ptr %3, align 4 .... br label %entry omp.par.entry: ; preds = %entry .... %omp.private.alloc = alloca i32, align 4 ``` Note the following: - The private value `x` is alloced where it should be in the parallel op's entry region, - howerver, since the privae value is also a depedency of the task and since allocation and initialzation of the task depedency info struct both happen in the alloc region, - this results in the private value being referenced before it is actually defined. This PR fixes the issue by only allocating the task depedency info in the alloc region while initialzation happens in the current IP of the function with the rest of the logic that depends on it.
1 parent b5ef33b commit e295f5d

File tree

2 files changed

+60
-11
lines changed

2 files changed

+60
-11
lines changed

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2056,6 +2056,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
20562056
Type *DepArrayTy = ArrayType::get(DependInfo, Dependencies.size());
20572057
DepArray = Builder.CreateAlloca(DepArrayTy, nullptr, ".dep.arr.addr");
20582058

2059+
Builder.restoreIP(OldIP);
2060+
20592061
unsigned P = 0;
20602062
for (const DependData &Dep : Dependencies) {
20612063
Value *Base =
@@ -2084,8 +2086,6 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
20842086
Flags);
20852087
++P;
20862088
}
2087-
2088-
Builder.restoreIP(OldIP);
20892089
}
20902090

20912091
// In the presence of the `if` clause, the following IR is generated:

mlir/test/Target/LLVMIR/openmp-llvm.mlir

Lines changed: 58 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2651,6 +2651,16 @@ llvm.func @omp_task_attrs() -> () attributes {
26512651
// CHECK-LABEL: define void @omp_task_with_deps
26522652
// CHECK-SAME: (ptr %[[zaddr:.+]])
26532653
// CHECK: %[[dep_arr_addr:.+]] = alloca [1 x %struct.kmp_dep_info], align 8
2654+
// CHECK: %[[DEP_ARR_ADDR1:.+]] = alloca [1 x %struct.kmp_dep_info], align 8
2655+
// CHECK: %[[DEP_ARR_ADDR2:.+]] = alloca [1 x %struct.kmp_dep_info], align 8
2656+
// CHECK: %[[DEP_ARR_ADDR3:.+]] = alloca [1 x %struct.kmp_dep_info], align 8
2657+
// CHECK: %[[DEP_ARR_ADDR4:.+]] = alloca [1 x %struct.kmp_dep_info], align 8
2658+
2659+
// CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}})
2660+
// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc
2661+
// CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40,
2662+
// CHECK-SAME: i64 0, ptr @[[outlined_fn:.+]])
2663+
26542664
// CHECK: %[[dep_arr_addr_0:.+]] = getelementptr inbounds [1 x %struct.kmp_dep_info], ptr %[[dep_arr_addr]], i64 0, i64 0
26552665
// CHECK: %[[dep_arr_addr_0_val:.+]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[dep_arr_addr_0]], i32 0, i32 0
26562666
// CHECK: %[[dep_arr_addr_0_val_int:.+]] = ptrtoint ptr %0 to i64
@@ -2659,40 +2669,33 @@ llvm.func @omp_task_attrs() -> () attributes {
26592669
// CHECK: store i64 8, ptr %[[dep_arr_addr_0_size]], align 4
26602670
// CHECK: %[[dep_arr_addr_0_kind:.+]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[dep_arr_addr_0]], i32 0, i32 2
26612671
// CHECK: store i8 1, ptr %[[dep_arr_addr_0_kind]], align 1
2672+
2673+
// CHECK: call i32 @__kmpc_omp_task_with_deps(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]], {{.*}})
26622674
// -----
26632675
// dependence_type: Out
2664-
// CHECK: %[[DEP_ARR_ADDR1:.+]] = alloca [1 x %struct.kmp_dep_info], align 8
26652676
// CHECK: %[[DEP_ARR_ADDR_1:.+]] = getelementptr inbounds [1 x %struct.kmp_dep_info], ptr %[[DEP_ARR_ADDR1]], i64 0, i64 0
26662677
// [...]
26672678
// CHECK: %[[DEP_TYPE_1:.+]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[DEP_ARR_ADDR_1]], i32 0, i32 2
26682679
// CHECK: store i8 3, ptr %[[DEP_TYPE_1]], align 1
26692680
// -----
26702681
// dependence_type: Inout
2671-
// CHECK: %[[DEP_ARR_ADDR2:.+]] = alloca [1 x %struct.kmp_dep_info], align 8
26722682
// CHECK: %[[DEP_ARR_ADDR_2:.+]] = getelementptr inbounds [1 x %struct.kmp_dep_info], ptr %[[DEP_ARR_ADDR2]], i64 0, i64 0
26732683
// [...]
26742684
// CHECK: %[[DEP_TYPE_2:.+]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[DEP_ARR_ADDR_2]], i32 0, i32 2
26752685
// CHECK: store i8 3, ptr %[[DEP_TYPE_2]], align 1
26762686
// -----
26772687
// dependence_type: Mutexinoutset
2678-
// CHECK: %[[DEP_ARR_ADDR3:.+]] = alloca [1 x %struct.kmp_dep_info], align 8
26792688
// CHECK: %[[DEP_ARR_ADDR_3:.+]] = getelementptr inbounds [1 x %struct.kmp_dep_info], ptr %[[DEP_ARR_ADDR3]], i64 0, i64 0
26802689
// [...]
26812690
// CHECK: %[[DEP_TYPE_3:.+]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[DEP_ARR_ADDR_3]], i32 0, i32 2
26822691
// CHECK: store i8 4, ptr %[[DEP_TYPE_3]], align 1
26832692
// -----
26842693
// dependence_type: Inoutset
2685-
// CHECK: %[[DEP_ARR_ADDR4:.+]] = alloca [1 x %struct.kmp_dep_info], align 8
26862694
// CHECK: %[[DEP_ARR_ADDR_4:.+]] = getelementptr inbounds [1 x %struct.kmp_dep_info], ptr %[[DEP_ARR_ADDR4]], i64 0, i64 0
26872695
// [...]
26882696
// CHECK: %[[DEP_TYPE_4:.+]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[DEP_ARR_ADDR_4]], i32 0, i32 2
26892697
// CHECK: store i8 8, ptr %[[DEP_TYPE_4]], align 1
26902698
llvm.func @omp_task_with_deps(%zaddr: !llvm.ptr) {
2691-
// CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}})
2692-
// CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc
2693-
// CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 40,
2694-
// CHECK-SAME: i64 0, ptr @[[outlined_fn:.+]])
2695-
// CHECK: call i32 @__kmpc_omp_task_with_deps(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]], {{.*}})
26962699
omp.task depend(taskdependin -> %zaddr : !llvm.ptr) {
26972700
%n = llvm.mlir.constant(1 : i64) : i64
26982701
%valaddr = llvm.alloca %n x i32 : (i64) -> !llvm.ptr
@@ -3373,3 +3376,49 @@ llvm.func @distribute_wsloop(%lb : i32, %ub : i32, %step : i32) {
33733376
// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num({{.*}})
33743377
// CHECK: %[[DIST_UB:.*]] = alloca i32
33753378
// CHECK: call void @__kmpc_dist_for_static_init_{{.*}}(ptr @{{.*}}, i32 %[[TID]], i32 34, ptr %[[LASTITER]], ptr %[[LB]], ptr %[[UB]], ptr %[[DIST_UB]], ptr %[[STRIDE]], i32 1, i32 0)
3379+
3380+
// -----
3381+
3382+
omp.private {type = private} @_QFEx_private_i32 : i32
3383+
llvm.func @nested_task_with_deps() {
3384+
%0 = llvm.mlir.constant(1 : i64) : i64
3385+
%1 = llvm.alloca %0 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr
3386+
%2 = llvm.mlir.constant(1 : i64) : i64
3387+
omp.parallel private(@_QFEx_private_i32 %1 -> %arg0 : !llvm.ptr) {
3388+
omp.task depend(taskdependout -> %arg0 : !llvm.ptr) {
3389+
omp.terminator
3390+
}
3391+
omp.terminator
3392+
}
3393+
llvm.return
3394+
}
3395+
3396+
// CHECK-LABEL: define void @nested_task_with_deps() {
3397+
// CHECK: %[[PAR_FORK_ARG:.*]] = alloca { ptr }, align 8
3398+
// CHECK: %[[DEP_ARR:.*]] = alloca [1 x %struct.kmp_dep_info], align 8
3399+
3400+
// CHECK: omp_parallel:
3401+
// CHECK-NEXT: %[[DEP_ARR_GEP:.*]] = getelementptr { ptr }, ptr %[[PAR_FORK_ARG]], i32 0, i32 0
3402+
// CHECK-NEXT: store ptr %[[DEP_ARR]], ptr %[[DEP_ARR_GEP]], align 8
3403+
// CHECK-NEXT: call void {{.*}} @__kmpc_fork_call(ptr @{{.*}}, i32 1, ptr @[[PAR_OUTLINED:.*]], ptr %[[PAR_FORK_ARG]])
3404+
// CHECK-NEXT: br label %[[PAR_EXIT:.*]]
3405+
3406+
// CHECK: [[PAR_EXIT]]:
3407+
// CHECK-NEXT: ret void
3408+
// CHECK: }
3409+
3410+
// CHECK: define internal void @[[PAR_OUTLINED]]{{.*}} {
3411+
// CHECK: omp.par.entry:
3412+
// CHECK: %[[DEP_ARR_GEP_2:.*]] = getelementptr { ptr }, ptr %{{.*}}, i32 0, i32 0
3413+
// CHECK: %[[DEP_ARR_2:.*]] = load ptr, ptr %[[DEP_ARR_GEP_2]], align 8
3414+
// CHECK: %[[PRIV_ALLOC:omp.private.alloc]] = alloca i32, align 4
3415+
3416+
// CHECK: %[[TASK:.*]] = call ptr @__kmpc_omp_task_alloc
3417+
// CHECK: %[[DEP_STRUCT_GEP:.*]] = getelementptr inbounds [1 x %struct.kmp_dep_info], ptr %[[DEP_ARR_2]], i64 0, i64 0
3418+
// CHECK: %[[DEP_GEP:.*]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[DEP_STRUCT_GEP]], i32 0, i32 0
3419+
// CHECK: %[[PRIV_ALLOC_TO_INT:.*]] = ptrtoint ptr %[[PRIV_ALLOC]] to i64
3420+
// CHECK: store i64 %[[PRIV_ALLOC_TO_INT]], ptr %[[DEP_GEP]], align 4
3421+
// CHECK: call i32 @__kmpc_omp_task_with_deps(ptr @{{.*}}, i32 %{{.*}}, ptr %{{.*}}, i32 1, ptr %[[DEP_ARR_2]], i32 0, ptr null)
3422+
3423+
// CHECK: ret void
3424+
// CHECK: }

0 commit comments

Comments
 (0)