7
7
/// Checks tile spill/reloads are inserted around in-memory tiles (i.e. tiles
8
8
/// that were not assigned a physical SME tile).
9
9
///
10
- /// These spills are currently very naive and paranoid and will spill/reload
11
- /// entire tiles around ArmSME ops.
10
+ /// These spills are currently very naive and will spill/reload entire tiles
11
+ /// around ArmSME ops.
12
12
///
13
13
/// The general pattern is:
14
14
///
34
34
/// Then around the op:
35
35
///
36
36
/// // Swap contents of %tileAlloca and tile 0
37
- /// scf.for %sliceIdx ... {
37
+ /// scf.for %sliceIdx ...
38
38
/// %currentSlice = arm_sme.intr.read.horiz {tile_id = 0}
39
39
/// arm_sme.intr.ld1h.horiz %tileAlloca[%sliceIdx, %c0] {tile_id = 0}
40
40
/// vector.store %currentSlice, %tileAlloca[%sliceIdx, %c0]
41
- /// }
42
41
/// // Execute the op using tile 0
43
42
/// arm_sme.intr.zero
44
43
/// // Swap contents of %tileAlloca and tile 0
45
- /// scf.for %sliceIdx ... {
44
+ /// scf.for %sliceIdx ...
46
45
/// %currentSlice = arm_sme.intr.read.horiz {tile_id = 0}
47
46
/// arm_sme.intr.ld1h.horiz %tileAlloca[%sliceIdx, %c0] {tile_id = 0}
48
47
/// vector.store %currentSlice, %tileAlloca[%sliceIdx, %c0]
49
- /// }
50
48
///
51
49
52
50
// -----
@@ -78,10 +76,12 @@ func.func @use_too_many_tiles() {
78
76
// AFTER-LLVM-LOWERING-SAME: {arm_sme.in_memory_tile_id = 16 : i32} : memref<?x?xi16>
79
77
//
80
78
// AFTER-LLVM-LOWERING-NOT: scf.for
81
- // AFTER-LLVM-LOWERING: arm_sme.intr.zero
79
+ // Note: 17 is the mask for the 32-bit tile 0.
80
+ // AFTER-LLVM-LOWERING: "arm_sme.intr.zero"() <{tile_mask = 17 : i32}>
82
81
//
83
82
// AFTER-LLVM-LOWERING-NOT: scf.for
84
- // AFTER-LLVM-LOWERING: arm_sme.intr.zero
83
+ // Note: 34 is the mask for the 32-bit tile 1.
84
+ // AFTER-LLVM-LOWERING: "arm_sme.intr.zero"() <{tile_mask = 34 : i32}>
85
85
//
86
86
// AFTER-LLVM-LOWERING: scf.for
87
87
// AFTER-LLVM-LOWERING-SAME: %[[C0]] to %[[SVL_H]] step %[[C1]] {
@@ -92,7 +92,8 @@ func.func @use_too_many_tiles() {
92
92
// AFTER-LLVM-LOWERING-NEXT: "arm_sme.intr.ld1h.horiz"({{.*}}, %[[SLICE_PTR]], {{.*}}) <{tile_id = 0 : i32}>
93
93
// AFTER-LLVM-LOWERING-NEXT: vector.store %[[SLICE]], %[[TILE_ALLOCA]]
94
94
// AFTER-LLVM-LOWERING-NEXT: }
95
- // AFTER-LLVM-LOWERING: arm_sme.intr.zero
95
+ // Note: 85 is the mask for the 16-bit tile 0.
96
+ // AFTER-LLVM-LOWERING: "arm_sme.intr.zero"() <{tile_mask = 85 : i32}>
96
97
// AFTER-LLVM-LOWERING: scf.for
97
98
// AFTER-LLVM-LOWERING-SAME: %[[C0]] to %[[SVL_H]] step %[[C1]] {
98
99
// AFTER-LLVM-LOWERING: %[[MEM_DESC:.*]] = builtin.unrealized_conversion_cast %[[TILE_ALLOCA]]
0 commit comments