@@ -279,3 +279,75 @@ func.func @avoidable_spill(%a: vector<[4]xf32>, %b: vector<[4]xf32>, %c: vector<
279
279
}
280
280
return
281
281
}
282
+
283
+ // -----
284
+
285
+ // This test is a follow up to the test of the same name in `tile-allocation-copies.mlir`.
286
+ // This shows the live ranges (which are why we need to split the conditional branch).
287
+
288
+ // CHECK-LIVE-RANGE-LABEL: @cond_branch_with_backedge
289
+ // CHECK-LIVE-RANGE: ^bb1:
290
+ // CHECK-LIVE-RANGE--NEXT: ||| | arith.cmpi
291
+ // CHECK-LIVE-RANGE--NEXT: EEE E cf.cond_br
292
+ //
293
+ // CHECK-LIVE-RANGE--NEXT: ^[[BB3_COPIES:[[:alnum:]]+]]:
294
+ // CHECK-LIVE-RANGE--NEXT: ||| ES arm_sme.copy_tile
295
+ // CHECK-LIVE-RANGE--NEXT: E|| |S arm_sme.copy_tile
296
+ // CHECK-LIVE-RANGE--NEXT: E| ||S arm_sme.copy_tile
297
+ // CHECK-LIVE-RANGE--NEXT: E |||S arm_sme.copy_tile
298
+ // CHECK-LIVE-RANGE--NEXT: EEEE cf.br
299
+ //
300
+ // It is important to note that the first three live ranges in ^bb1 do not end
301
+ // at the `cf.cond_br` they are live-out via the backedge bb1 -> bb2 -> bb1.
302
+ // This means that if we placed the `arm_sme.tile_copies` before the `cf.cond_br`
303
+ // then those live ranges would not end at the copies, resulting in unwanted
304
+ // overlapping live ranges (and hence tile spills).
305
+ //
306
+ // With the conditional branch split and the copies placed in the BB3_COPIES
307
+ // block the first three live ranges end at the copy operations (as the
308
+ // BB3_COPIES block is on the path out of the loop and has no backedge). This
309
+ // means there is no overlaps and the live ranges all merge, as shown below.
310
+ //
311
+ // CHECK-LIVE-RANGE: ========== Coalesced Live Ranges:
312
+ // CHECK-LIVE-RANGE: ^bb1:
313
+ // CHECK-LIVE-RANGE--NEXT: |||| arith.cmpi
314
+ // CHECK-LIVE-RANGE--NEXT: EEEE cf.cond_br
315
+ //
316
+ // CHECK-LIVE-RANGE--NEXT: ^[[BB3_COPIES]]:
317
+ // CHECK-LIVE-RANGE--NEXT: |||| arm_sme.copy_tile
318
+ // CHECK-LIVE-RANGE--NEXT: |||| arm_sme.copy_tile
319
+ // CHECK-LIVE-RANGE--NEXT: |||| arm_sme.copy_tile
320
+ // CHECK-LIVE-RANGE--NEXT: |||| arm_sme.copy_tile
321
+ // CHECK-LIVE-RANGE--NEXT: EEEE cf.br
322
+
323
+ // CHECK-LABEL: @cond_branch_with_backedge
324
+ // CHECK-NOT: tile_id = 16
325
+ // CHECK: arm_sme.get_tile {tile_id = 0 : i32} : vector<[4]x[4]xf32>
326
+ // CHECK: arm_sme.get_tile {tile_id = 1 : i32} : vector<[4]x[4]xf32>
327
+ // CHECK: arm_sme.get_tile {tile_id = 2 : i32} : vector<[4]x[4]xf32>
328
+ // CHECK: arm_sme.get_tile {tile_id = 3 : i32} : vector<[4]x[4]xf32>
329
+ // CHECK: arm_sme.move_vector_to_tile_slice {{.*}} {tile_id = 0 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32>
330
+ // CHECK-NOT tile_id = 16
331
+ func.func @cond_branch_with_backedge (%slice: vector <[4 ]xf32 >) {
332
+ %tileA = arm_sme.get_tile : vector <[4 ]x[4 ]xf32 >
333
+ %tileB = arm_sme.get_tile : vector <[4 ]x[4 ]xf32 >
334
+ %tileC = arm_sme.get_tile : vector <[4 ]x[4 ]xf32 >
335
+ %tileD = arm_sme.get_tile : vector <[4 ]x[4 ]xf32 >
336
+ %c0 = arith.constant 0 : index
337
+ %c1 = arith.constant 1 : index
338
+ %c10 = arith.constant 10 : index
339
+ // Live here: %tileA, %tileB, %tileC, %tileD
340
+ cf.br ^bb1 (%c0 , %tileA : index , vector <[4 ]x[4 ]xf32 >)
341
+ ^bb1 (%currentIndex: index , %iterTile: vector <[4 ]x[4 ]xf32 >):
342
+ %continueLoop = arith.cmpi slt , %currentIndex , %c10 : index
343
+ // Live here: %iterTile, %tileB, %tileC, %tileD
344
+ cf.cond_br %continueLoop , ^bb2 , ^bb3 (%iterTile , %tileB , %tileC , %tileD : vector <[4 ]x[4 ]xf32 >, vector <[4 ]x[4 ]xf32 >, vector <[4 ]x[4 ]xf32 >, vector <[4 ]x[4 ]xf32 >)
345
+ ^bb2 :
346
+ // Live here: %iterTile, %tileB, %tileC, %tileD
347
+ %nextTile = arm_sme.move_vector_to_tile_slice %slice , %iterTile , %currentIndex : vector <[4 ]xf32 > into vector <[4 ]x[4 ]xf32 >
348
+ %nextIndex = arith.addi %currentIndex , %c1 : index
349
+ cf.br ^bb1 (%nextIndex , %nextTile : index , vector <[4 ]x[4 ]xf32 >)
350
+ ^bb3 (%finalTileA: vector <[4 ]x[4 ]xf32 >, %finalTileB: vector <[4 ]x[4 ]xf32 >, %finalTileC: vector <[4 ]x[4 ]xf32 >, %finalTileD: vector <[4 ]x[4 ]xf32 >):
351
+ // Live here: %finalTileA, %finalTileB, %finalTileC, %finalTileD
352
+ return
353
+ }
0 commit comments