Skip to content

Commit bda723f

Browse files
[NFC][OpenMP][MLIR] Add MLIR test for lowering parallel if (#71788)
Add test for clause omp target parallel if (parallel : cond ) Test checks if corresponding MLIR construct is correctly lowered to LLVM IR.
1 parent 521b468 commit bda723f

File tree

1 file changed

+65
-21
lines changed

1 file changed

+65
-21
lines changed

mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir

Lines changed: 65 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -32,35 +32,79 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
3232
}
3333
llvm.return
3434
}
35+
36+
llvm.func @parallel_if(%arg0: !llvm.ptr {fir.bindc_name = "ifcond"}) {
37+
%0 = llvm.mlir.constant(1 : i64) : i64
38+
%1 = llvm.alloca %0 x i32 {bindc_name = "d"} : (i64) -> !llvm.ptr
39+
%2 = omp.map_info var_ptr(%1 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"}
40+
%3 = omp.map_info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "ifcond"}
41+
omp.target map_entries(%2 -> %arg1, %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
42+
^bb0(%arg1: !llvm.ptr, %arg2: !llvm.ptr):
43+
%4 = llvm.mlir.constant(10 : i32) : i32
44+
%5 = llvm.load %arg2 : !llvm.ptr -> i32
45+
%6 = llvm.mlir.constant(0 : i64) : i32
46+
%7 = llvm.icmp "ne" %5, %6 : i32
47+
omp.parallel if(%7 : i1) {
48+
llvm.store %4, %arg1 : i32, !llvm.ptr
49+
omp.terminator
50+
}
51+
omp.terminator
52+
}
53+
llvm.return
54+
}
3555
}
3656

37-
// CHECK: define weak_odr protected amdgpu_kernel void [[FUNC0:@.*]](
38-
// CHECK-SAME: ptr [[TMP:%.*]], ptr [[TMP0:.*]]) {
39-
// CHECK: [[TMP1:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
40-
// CHECK: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
41-
// CHECK: [[STRUCTARG:%.*]] = alloca { ptr }, align 8, addrspace(5)
42-
// CHECK: [[STRUCTARG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[STRUCTARG]] to ptr
43-
// CHECK: [[TMP3:%.*]] = alloca ptr, align 8, addrspace(5)
44-
// CHECK: [[TMP4:%.*]] = addrspacecast ptr addrspace(5) [[TMP3]] to ptr
45-
// CHECK: store ptr [[TMP0]], ptr [[TMP4]], align 8
46-
// CHECK: [[TMP5:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) [[KERNEL_ENV:@.*]] to ptr), ptr [[TMP]])
47-
// CHECK: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP5]], -1
48-
// CHECK: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
49-
// CHECK: [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8
50-
// CHECK: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
51-
// CHECK: [[GEP_:%.*]] = getelementptr { ptr }, ptr addrspace(5) [[STRUCTARG]], i32 0, i32 0
52-
// CHECK: store ptr [[TMP6]], ptr addrspace(5) [[GEP_]], align 8
53-
// CHECK: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
54-
// CHECK: store ptr [[STRUCTARG_ASCAST]], ptr [[TMP7]], align 8
55-
// CHECK: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[OMP_GLOBAL_THREAD_NUM]], i32 1, i32 -1, i32 -1, ptr [[FUNC1:@.*]], ptr null, ptr [[TMP2]], i64 1)
57+
// CHECK: define weak_odr protected amdgpu_kernel void @[[FUNC0:.*]](
58+
// CHECK-SAME: ptr %[[TMP:.*]], ptr %[[TMP0:.*]]) {
59+
// CHECK: %[[TMP1:.*]] = alloca [1 x ptr], align 8, addrspace(5)
60+
// CHECK: %[[TMP2:.*]] = addrspacecast ptr addrspace(5) %[[TMP1]] to ptr
61+
// CHECK: %[[STRUCTARG:.*]] = alloca { ptr }, align 8, addrspace(5)
62+
// CHECK: %[[STRUCTARG_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[STRUCTARG]] to ptr
63+
// CHECK: %[[TMP3:.*]] = alloca ptr, align 8, addrspace(5)
64+
// CHECK: %[[TMP4:.*]] = addrspacecast ptr addrspace(5) %[[TMP3]] to ptr
65+
// CHECK: store ptr %[[TMP0]], ptr %[[TMP4]], align 8
66+
// CHECK: %[[TMP5:.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{.*}} to ptr), ptr %[[TMP]])
67+
// CHECK: %[[EXEC_USER_CODE:.*]] = icmp eq i32 %[[TMP5]], -1
68+
// CHECK: br i1 %[[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[WORKER_EXIT:.*]]
69+
// CHECK: %[[TMP6:.*]] = load ptr, ptr %[[TMP4]], align 8
70+
// CHECK: %[[OMP_GLOBAL_THREAD_NUM:.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
71+
// CHECK: %[[GEP_:.*]] = getelementptr { ptr }, ptr addrspace(5) %[[STRUCTARG]], i32 0, i32 0
72+
// CHECK: store ptr %[[TMP6]], ptr addrspace(5) %[[GEP_]], align 8
73+
// CHECK: %[[TMP7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[TMP2]], i64 0, i64 0
74+
// CHECK: store ptr %[[STRUCTARG_ASCAST]], ptr %[[TMP7]], align 8
75+
// CHECK: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 1, i32 -1, i32 -1, ptr @[[FUNC1:.*]], ptr null, ptr %[[TMP2]], i64 1)
5676
// CHECK: call void @__kmpc_target_deinit()
5777

58-
// CHECK: define internal void [[FUNC1]](
59-
// CHECK-SAME: ptr noalias noundef [[TID_ADDR_ASCAST:%.*]], ptr noalias noundef [[ZERO_ADDR_ASCAST:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
78+
// CHECK: define internal void @[[FUNC1]](
79+
// CHECK-SAME: ptr noalias noundef {{.*}}, ptr noalias noundef {{.*}}, ptr {{.*}}) #{{[0-9]+}} {
80+
81+
// Test if num_threads OpenMP clause for target region is correctly lowered
82+
// and passed as a param to kmpc_parallel_51 function
6083

6184
// CHECK: define weak_odr protected amdgpu_kernel void [[FUNC_NUM_THREADS0:@.*]](
6285
// CHECK-NOT: call void @__kmpc_push_num_threads(
6386
// CHECK: call void @__kmpc_parallel_51(ptr addrspacecast (
6487
// CHECK-SAME: ptr addrspace(1) @[[NUM_THREADS_GLOB:[0-9]+]] to ptr),
6588
// CHECK-SAME: i32 [[NUM_THREADS_TMP0:%.*]], i32 1, i32 156,
6689
// CHECK-SAME: i32 -1, ptr [[FUNC_NUM_THREADS1:@.*]], ptr null, ptr [[NUM_THREADS_TMP1:%.*]], i64 1)
90+
91+
// One of the arguments of kmpc_parallel_51 function is responsible for handling if clause
92+
// of omp parallel construct for target region. If this argument is nonzero,
93+
// then kmpc_parallel_51 launches multiple threads for parallel region.
94+
//
95+
// This test checks if MLIR expression:
96+
// %7 = llvm.icmp "ne" %5, %6 : i32
97+
// omp.parallel if(%7 : i1)
98+
// is correctly lowered to LLVM IR code and the if condition variable
99+
// is passed as a param to kmpc_parallel_51 function
100+
101+
// CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}(
102+
// CHECK-SAME: ptr {{.*}}, ptr {{.*}}, ptr %[[IFCOND_ARG2:.*]]) {
103+
// CHECK: store ptr %[[IFCOND_ARG2]], ptr %[[IFCOND_TMP1:.*]], align 8
104+
// CHECK: %[[IFCOND_TMP2:.*]] = load i32, ptr %[[IFCOND_TMP1]], align 4
105+
// CHECK: %[[IFCOND_TMP3:.*]] = icmp ne i32 %[[IFCOND_TMP2]], 0
106+
// CHECK: %[[IFCOND_TMP4:.*]] = sext i1 %[[IFCOND_TMP3]] to i32
107+
// CHECK: call void @__kmpc_parallel_51(ptr addrspacecast (
108+
// CHECK-SAME: ptr addrspace(1) {{.*}} to ptr),
109+
// CHECK-SAME: i32 {{.*}}, i32 %[[IFCOND_TMP4]], i32 -1,
110+
// CHECK-SAME: i32 -1, ptr {{.*}}, ptr null, ptr {{.*}}, i64 1)

0 commit comments

Comments
 (0)