@@ -32,35 +32,79 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
32
32
}
33
33
llvm.return
34
34
}
35
+
36
+ llvm.func @parallel_if (%arg0: !llvm.ptr {fir.bindc_name = " ifcond" }) {
37
+ %0 = llvm.mlir.constant (1 : i64 ) : i64
38
+ %1 = llvm.alloca %0 x i32 {bindc_name = " d" } : (i64 ) -> !llvm.ptr
39
+ %2 = omp.map_info var_ptr (%1 : !llvm.ptr , i32 ) map_clauses (from ) capture (ByRef ) -> !llvm.ptr {name = " d" }
40
+ %3 = omp.map_info var_ptr (%arg0 : !llvm.ptr , i32 ) map_clauses (implicit , exit_release_or_enter_alloc ) capture (ByCopy ) -> !llvm.ptr {name = " ifcond" }
41
+ omp.target map_entries (%2 -> %arg1 , %3 -> %arg2 : !llvm.ptr , !llvm.ptr ) {
42
+ ^bb0 (%arg1: !llvm.ptr , %arg2: !llvm.ptr ):
43
+ %4 = llvm.mlir.constant (10 : i32 ) : i32
44
+ %5 = llvm.load %arg2 : !llvm.ptr -> i32
45
+ %6 = llvm.mlir.constant (0 : i64 ) : i32
46
+ %7 = llvm.icmp " ne" %5 , %6 : i32
47
+ omp.parallel if (%7 : i1 ) {
48
+ llvm.store %4 , %arg1 : i32 , !llvm.ptr
49
+ omp.terminator
50
+ }
51
+ omp.terminator
52
+ }
53
+ llvm.return
54
+ }
35
55
}
36
56
37
- // CHECK: define weak_odr protected amdgpu_kernel void [[FUNC0:@ .*]](
38
- // CHECK-SAME: ptr [[TMP:% .*]], ptr [[TMP0:.*]]) {
39
- // CHECK: [[TMP1:% .*]] = alloca [1 x ptr], align 8, addrspace(5)
40
- // CHECK: [[TMP2:% .*]] = addrspacecast ptr addrspace(5) [[TMP1]] to ptr
41
- // CHECK: [[STRUCTARG:% .*]] = alloca { ptr }, align 8, addrspace(5)
42
- // CHECK: [[STRUCTARG_ASCAST:% .*]] = addrspacecast ptr addrspace(5) [[STRUCTARG]] to ptr
43
- // CHECK: [[TMP3:% .*]] = alloca ptr, align 8, addrspace(5)
44
- // CHECK: [[TMP4:% .*]] = addrspacecast ptr addrspace(5) [[TMP3]] to ptr
45
- // CHECK: store ptr [[TMP0]], ptr [[TMP4]], align 8
46
- // CHECK: [[TMP5:% .*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) [[KERNEL_ENV:@.*]] to ptr), ptr [[TMP]])
47
- // CHECK: [[EXEC_USER_CODE:% .*]] = icmp eq i32 [[TMP5]], -1
48
- // CHECK: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:% .*]], label [[WORKER_EXIT:% .*]]
49
- // CHECK: [[TMP6:% .*]] = load ptr, ptr [[TMP4]], align 8
50
- // CHECK: [[OMP_GLOBAL_THREAD_NUM:% .*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
51
- // CHECK: [[GEP_:% .*]] = getelementptr { ptr }, ptr addrspace(5) [[STRUCTARG]], i32 0, i32 0
52
- // CHECK: store ptr [[TMP6]], ptr addrspace(5) [[GEP_]], align 8
53
- // CHECK: [[TMP7:% .*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0
54
- // CHECK: store ptr [[STRUCTARG_ASCAST]], ptr [[TMP7]], align 8
55
- // CHECK: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[OMP_GLOBAL_THREAD_NUM]], i32 1, i32 -1, i32 -1, ptr [[FUNC1:@ .*]], ptr null, ptr [[TMP2]], i64 1)
57
+ // CHECK: define weak_odr protected amdgpu_kernel void @ [[FUNC0:.*]](
58
+ // CHECK-SAME: ptr % [[TMP:.*]], ptr % [[TMP0:.*]]) {
59
+ // CHECK: % [[TMP1:.*]] = alloca [1 x ptr], align 8, addrspace(5)
60
+ // CHECK: % [[TMP2:.*]] = addrspacecast ptr addrspace(5) % [[TMP1]] to ptr
61
+ // CHECK: % [[STRUCTARG:.*]] = alloca { ptr }, align 8, addrspace(5)
62
+ // CHECK: % [[STRUCTARG_ASCAST:.*]] = addrspacecast ptr addrspace(5) % [[STRUCTARG]] to ptr
63
+ // CHECK: % [[TMP3:.*]] = alloca ptr, align 8, addrspace(5)
64
+ // CHECK: % [[TMP4:.*]] = addrspacecast ptr addrspace(5) % [[TMP3]] to ptr
65
+ // CHECK: store ptr % [[TMP0]], ptr % [[TMP4]], align 8
66
+ // CHECK: % [[TMP5:.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{.*}} to ptr), ptr % [[TMP]])
67
+ // CHECK: % [[EXEC_USER_CODE:.*]] = icmp eq i32 % [[TMP5]], -1
68
+ // CHECK: br i1 % [[EXEC_USER_CODE]], label % [[USER_CODE_ENTRY:.*]], label % [[WORKER_EXIT:.*]]
69
+ // CHECK: % [[TMP6:.*]] = load ptr, ptr % [[TMP4]], align 8
70
+ // CHECK: % [[OMP_GLOBAL_THREAD_NUM:.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
71
+ // CHECK: % [[GEP_:.*]] = getelementptr { ptr }, ptr addrspace(5) % [[STRUCTARG]], i32 0, i32 0
72
+ // CHECK: store ptr % [[TMP6]], ptr addrspace(5) % [[GEP_]], align 8
73
+ // CHECK: % [[TMP7:.*]] = getelementptr inbounds [1 x ptr], ptr % [[TMP2]], i64 0, i64 0
74
+ // CHECK: store ptr % [[STRUCTARG_ASCAST]], ptr % [[TMP7]], align 8
75
+ // CHECK: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 % [[OMP_GLOBAL_THREAD_NUM]], i32 1, i32 -1, i32 -1, ptr @ [[FUNC1:.*]], ptr null, ptr % [[TMP2]], i64 1)
56
76
// CHECK: call void @__kmpc_target_deinit()
57
77
58
- // CHECK: define internal void [[FUNC1]](
59
- // CHECK-SAME: ptr noalias noundef [[TID_ADDR_ASCAST:%.*]], ptr noalias noundef [[ZERO_ADDR_ASCAST:%.*]], ptr [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
78
+ // CHECK: define internal void @[[FUNC1]](
79
+ // CHECK-SAME: ptr noalias noundef {{.*}}, ptr noalias noundef {{.*}}, ptr {{.*}}) #{{[0-9]+}} {
80
+
81
+ // Test if num_threads OpenMP clause for target region is correctly lowered
82
+ // and passed as a param to kmpc_parallel_51 function
60
83
61
84
// CHECK: define weak_odr protected amdgpu_kernel void [[FUNC_NUM_THREADS0:@.*]](
62
85
// CHECK-NOT: call void @__kmpc_push_num_threads(
63
86
// CHECK: call void @__kmpc_parallel_51(ptr addrspacecast (
64
87
// CHECK-SAME: ptr addrspace(1) @[[NUM_THREADS_GLOB:[0-9]+]] to ptr),
65
88
// CHECK-SAME: i32 [[NUM_THREADS_TMP0:%.*]], i32 1, i32 156,
66
89
// CHECK-SAME: i32 -1, ptr [[FUNC_NUM_THREADS1:@.*]], ptr null, ptr [[NUM_THREADS_TMP1:%.*]], i64 1)
90
+
91
+ // One of the arguments of kmpc_parallel_51 function is responsible for handling if clause
92
+ // of omp parallel construct for target region. If this argument is nonzero,
93
+ // then kmpc_parallel_51 launches multiple threads for parallel region.
94
+ //
95
+ // This test checks if MLIR expression:
96
+ // %7 = llvm.icmp "ne" %5, %6 : i32
97
+ // omp.parallel if(%7 : i1)
98
+ // is correctly lowered to LLVM IR code and the if condition variable
99
+ // is passed as a param to kmpc_parallel_51 function
100
+
101
+ // CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}(
102
+ // CHECK-SAME: ptr {{.*}}, ptr {{.*}}, ptr %[[IFCOND_ARG2:.*]]) {
103
+ // CHECK: store ptr %[[IFCOND_ARG2]], ptr %[[IFCOND_TMP1:.*]], align 8
104
+ // CHECK: %[[IFCOND_TMP2:.*]] = load i32, ptr %[[IFCOND_TMP1]], align 4
105
+ // CHECK: %[[IFCOND_TMP3:.*]] = icmp ne i32 %[[IFCOND_TMP2]], 0
106
+ // CHECK: %[[IFCOND_TMP4:.*]] = sext i1 %[[IFCOND_TMP3]] to i32
107
+ // CHECK: call void @__kmpc_parallel_51(ptr addrspacecast (
108
+ // CHECK-SAME: ptr addrspace(1) {{.*}} to ptr),
109
+ // CHECK-SAME: i32 {{.*}}, i32 %[[IFCOND_TMP4]], i32 -1,
110
+ // CHECK-SAME: i32 -1, ptr {{.*}}, ptr null, ptr {{.*}}, i64 1)
0 commit comments