@@ -236,3 +236,88 @@ func @nested_for_yield(%arg0 : index, %arg1 : index, %arg2 : index) -> f32 {
236
236
}
237
237
return %r : f32
238
238
}
239
+
240
+ func @generate () -> i64
241
+
242
+ // CHECK-LABEL: @simple_parallel_reduce_loop
243
+ // CHECK-SAME: %[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index, %[[INIT:.*]]: f32
244
+ func @simple_parallel_reduce_loop (%arg0: index , %arg1: index ,
245
+ %arg2: index , %arg3: f32 ) -> f32 {
246
+ // A parallel loop with reduction is converted through sequential loops with
247
+ // reductions into a CFG of blocks where the partially reduced value is
248
+ // passed across as a block argument.
249
+
250
+ // Branch to the condition block passing in the initial reduction value.
251
+ // CHECK: br ^[[COND:.*]](%[[LB]], %[[INIT]]
252
+
253
+ // Condition branch takes as arguments the current value of the iteration
254
+ // variable and the current partially reduced value.
255
+ // CHECK: ^[[COND]](%[[ITER:.*]]: index, %[[ITER_ARG:.*]]: f32
256
+ // CHECK: %[[COMP:.*]] = cmpi "slt", %[[ITER]], %[[UB]]
257
+ // CHECK: cond_br %[[COMP]], ^[[BODY:.*]], ^[[CONTINUE:.*]]
258
+
259
+ // Bodies of loop.reduce operations are folded into the main loop body. The
260
+ // result of this partial reduction is passed as argument to the condition
261
+ // block.
262
+ // CHECK: ^[[BODY]]:
263
+ // CHECK: %[[CST:.*]] = constant 4.2
264
+ // CHECK: %[[PROD:.*]] = mulf %[[ITER_ARG]], %[[CST]]
265
+ // CHECK: %[[INCR:.*]] = addi %[[ITER]], %[[STEP]]
266
+ // CHECK: br ^[[COND]](%[[INCR]], %[[PROD]]
267
+
268
+ // The continuation block has access to the (last value of) reduction.
269
+ // CHECK: ^[[CONTINUE]]:
270
+ // CHECK: return %[[ITER_ARG]]
271
+ %0 = loop.parallel (%i ) = (%arg0 ) to (%arg1 ) step (%arg2 ) init (%arg3 ) {
272
+ %cst = constant 42.0 : f32
273
+ loop.reduce (%cst ) {
274
+ ^bb0 (%lhs: f32 , %rhs: f32 ):
275
+ %1 = mulf %lhs , %rhs : f32
276
+ loop.reduce.return %1 : f32
277
+ } : f32
278
+ } : f32
279
+ return %0 : f32
280
+ }
281
+
282
+ // CHECK-LABEL: parallel_reduce_loop
283
+ // CHECK-SAME: %[[INIT1:[0-9A-Za-z_]*]]: f32)
284
+ func @parallel_reduce_loop (%arg0 : index , %arg1 : index , %arg2 : index ,
285
+ %arg3 : index , %arg4 : index , %arg5 : f32 ) -> (f32 , i64 ) {
286
+ // Multiple reduction blocks should be folded in the same body, and the
287
+ // reduction value must be forwarded through block structures.
288
+ // CHECK: %[[INIT2:.*]] = constant 42
289
+ // CHECK: br ^[[COND_OUT:.*]](%{{.*}}, %[[INIT1]], %[[INIT2]]
290
+ // CHECK: ^[[COND_OUT]](%{{.*}}: index, %[[ITER_ARG1_OUT:.*]]: f32, %[[ITER_ARG2_OUT:.*]]: i64
291
+ // CHECK: cond_br %{{.*}}, ^[[BODY_OUT:.*]], ^[[CONT_OUT:.*]]
292
+ // CHECK: ^[[BODY_OUT]]:
293
+ // CHECK: br ^[[COND_IN:.*]](%{{.*}}, %[[ITER_ARG1_OUT]], %[[ITER_ARG2_OUT]]
294
+ // CHECK: ^[[COND_IN]](%{{.*}}: index, %[[ITER_ARG1_IN:.*]]: f32, %[[ITER_ARG2_IN:.*]]: i64
295
+ // CHECK: cond_br %{{.*}}, ^[[BODY_IN:.*]], ^[[CONT_IN:.*]]
296
+ // CHECK: ^[[BODY_IN]]:
297
+ // CHECK: %[[REDUCE1:.*]] = addf %[[ITER_ARG1_IN]], %{{.*}}
298
+ // CHECK: %[[REDUCE2:.*]] = or %[[ITER_ARG2_IN]], %{{.*}}
299
+ // CHECK: br ^[[COND_IN]](%{{.*}}, %[[REDUCE1]], %[[REDUCE2]]
300
+ // CHECK: ^[[CONT_IN]]:
301
+ // CHECK: br ^[[COND_OUT]](%{{.*}}, %[[ITER_ARG1_IN]], %[[ITER_ARG2_IN]]
302
+ // CHECK: ^[[CONT_OUT]]:
303
+ // CHECK: return %[[ITER_ARG1_OUT]], %[[ITER_ARG2_OUT]]
304
+ %step = constant 1 : index
305
+ %init = constant 42 : i64
306
+ %0:2 = loop.parallel (%i0 , %i1 ) = (%arg0 , %arg1 ) to (%arg2 , %arg3 )
307
+ step (%arg4 , %step ) init (%arg5 , %init ) {
308
+ %cf = constant 42.0 : f32
309
+ loop.reduce (%cf ) {
310
+ ^bb0 (%lhs: f32 , %rhs: f32 ):
311
+ %1 = addf %lhs , %rhs : f32
312
+ loop.reduce.return %1 : f32
313
+ } : f32
314
+
315
+ %2 = call @generate () : () -> i64
316
+ loop.reduce (%2 ) {
317
+ ^bb0 (%lhs: i64 , %rhs: i64 ):
318
+ %3 = or %lhs , %rhs : i64
319
+ loop.reduce.return %3 : i64
320
+ } : i64
321
+ } : f32 , i64
322
+ return %0#0 , %0#1 : f32 , i64
323
+ }
0 commit comments