@@ -27,13 +27,15 @@ func.func @reduction1(%arg0 : index, %arg1 : index, %arg2 : index,
27
27
%zero = arith.constant 0.0 : f32
28
28
// CHECK: omp.parallel
29
29
// CHECK: omp.wsloop
30
- // CHECK-SAME: reduction(@[[$REDF]] -> %[[BUF ]]
30
+ // CHECK-SAME: reduction(@[[$REDF]] %[[BUF]] -> %[[PVT_BUF:[a-z0-9]+ ]]
31
31
// CHECK: memref.alloca_scope
32
32
scf.parallel (%i0 , %i1 ) = (%arg0 , %arg1 ) to (%arg2 , %arg3 )
33
33
step (%arg4 , %step ) init (%zero ) -> (f32 ) {
34
34
// CHECK: %[[CST_INNER:.*]] = arith.constant 1.0
35
35
%one = arith.constant 1.0 : f32
36
- // CHECK: omp.reduction %[[CST_INNER]], %[[BUF]]
36
+ // CHECK: %[[PVT_VAL:.*]] = llvm.load %[[PVT_BUF]] : !llvm.ptr -> f32
37
+ // CHECK: %[[ADD_RESULT:.*]] = arith.addf %[[PVT_VAL]], %[[CST_INNER]] : f32
38
+ // CHECK: llvm.store %[[ADD_RESULT]], %[[PVT_BUF]] : f32, !llvm.ptr
37
39
scf.reduce (%one : f32 ) {
38
40
^bb0 (%lhs : f32 , %rhs: f32 ):
39
41
%res = arith.addf %lhs , %rhs : f32
@@ -103,10 +105,15 @@ func.func @reduction_muli(%arg0 : index, %arg1 : index, %arg2 : index,
103
105
%arg3 : index , %arg4 : index ) {
104
106
%step = arith.constant 1 : index
105
107
%one = arith.constant 1 : i32
108
+ // CHECK: %[[RED_VAR:.*]] = llvm.alloca %{{.*}} x i32 : (i64) -> !llvm.ptr
109
+ // CHECK: omp.wsloop reduction(@[[$REDI]] %[[RED_VAR]] -> %[[RED_PVT_VAR:.*]] : !llvm.ptr)
106
110
scf.parallel (%i0 , %i1 ) = (%arg0 , %arg1 ) to (%arg2 , %arg3 )
107
111
step (%arg4 , %step ) init (%one ) -> (i32 ) {
108
- // CHECK: omp.reduction
112
+ // CHECK: %[[C2:.*]] = arith.constant 2 : i32
109
113
%pow2 = arith.constant 2 : i32
114
+ // CHECK: %[[RED_PVT_VAL:.*]] = llvm.load %[[RED_PVT_VAR]] : !llvm.ptr -> i32
115
+ // CHECK: %[[MUL_RESULT:.*]] = arith.muli %[[RED_PVT_VAL]], %[[C2]] : i32
116
+ // CHECK: llvm.store %[[MUL_RESULT]], %[[RED_PVT_VAR]] : i32, !llvm.ptr
110
117
scf.reduce (%pow2 : i32 ) {
111
118
^bb0 (%lhs : i32 , %rhs: i32 ):
112
119
%res = arith.muli %lhs , %rhs : i32
@@ -199,16 +206,23 @@ func.func @reduction4(%arg0 : index, %arg1 : index, %arg2 : index,
199
206
200
207
// CHECK: omp.parallel
201
208
// CHECK: omp.wsloop
202
- // CHECK-SAME: reduction(@[[$REDF1]] -> %[[BUF1 ]]
203
- // CHECK-SAME: @[[$REDF2]] -> %[[BUF2 ]]
209
+ // CHECK-SAME: reduction(@[[$REDF1]] %[[BUF1]] -> %[[PVT_BUF1:[a-z0-9]+ ]]
210
+ // CHECK-SAME: @[[$REDF2]] %[[BUF2]] -> %[[PVT_BUF2:[a-z0-9]+ ]]
204
211
// CHECK: memref.alloca_scope
205
212
%res:2 = scf.parallel (%i0 , %i1 ) = (%arg0 , %arg1 ) to (%arg2 , %arg3 )
206
213
step (%arg4 , %step ) init (%zero , %ione ) -> (f32 , i64 ) {
214
+ // CHECK: %[[CST_ONE:.*]] = arith.constant 1.0{{.*}} : f32
207
215
%one = arith.constant 1.0 : f32
208
- // CHECK: arith.fptosi
216
+ // CHECK: %[[CST_INT_ONE:.*]] = arith.fptosi
209
217
%1 = arith.fptosi %one : f32 to i64
210
- // CHECK: omp.reduction %{{.*}}, %[[BUF1]]
211
- // CHECK: omp.reduction %{{.*}}, %[[BUF2]]
218
+ // CHECK: %[[PVT_VAL1:.*]] = llvm.load %[[PVT_BUF1]] : !llvm.ptr -> f32
219
+ // CHECK: %[[TEMP1:.*]] = arith.cmpf oge, %[[PVT_VAL1]], %[[CST_ONE]] : f32
220
+ // CHECK: %[[CMP_VAL1:.*]] = arith.select %[[TEMP1]], %[[PVT_VAL1]], %[[CST_ONE]] : f32
221
+ // CHECK: llvm.store %[[CMP_VAL1]], %[[PVT_BUF1]] : f32, !llvm.ptr
222
+ // CHECK: %[[PVT_VAL2:.*]] = llvm.load %[[PVT_BUF2]] : !llvm.ptr -> i64
223
+ // CHECK: %[[TEMP2:.*]] = arith.cmpi slt, %[[PVT_VAL2]], %[[CST_INT_ONE]] : i64
224
+ // CHECK: %[[CMP_VAL2:.*]] = arith.select %[[TEMP2]], %[[CST_INT_ONE]], %[[PVT_VAL2]] : i64
225
+ // CHECK: llvm.store %[[CMP_VAL2]], %[[PVT_BUF2]] : i64, !llvm.ptr
212
226
scf.reduce (%one , %1 : f32 , i64 ) {
213
227
^bb0 (%lhs : f32 , %rhs: f32 ):
214
228
%cmp = arith.cmpf oge , %lhs , %rhs : f32
0 commit comments