Skip to content

Commit 7fad304

Browse files
[AArch64][SME] Make coalescer barrier available without +sme. (#85311)
For each call that changes the streaming-mode ISel inserts a COALESCER_BARRIER node for the FP and (non-scalable) vector arguments to the callee. When calling a non-streaming function from a streaming-compatible function, it's not required to have +sme (in case the SME code-path is not actually executed at runtime). The patterns to match the COALESCER_BARRIER however were still predicated with `HasSME`, which is incorrect. This patch tries to fix that.
1 parent 68342ed commit 7fad304

File tree

2 files changed

+41
-2
lines changed

2 files changed

+41
-2
lines changed

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@ def : Pat<(int_aarch64_sme_set_tpidr2 i64:$val),
164164
def : Pat<(i64 (int_aarch64_sme_get_tpidr2)),
165165
(MRS 0xde85)>;
166166

167+
} // End let Predicates = [HasSME]
168+
167169
multiclass CoalescerBarrierPseudo<RegisterClass rc, list<ValueType> vts> {
168170
def NAME : Pseudo<(outs rc:$dst), (ins rc:$src), []>, Sched<[]> {
169171
let Constraints = "$dst = $src";
@@ -183,8 +185,6 @@ multiclass CoalescerBarriers {
183185

184186
defm COALESCER_BARRIER : CoalescerBarriers;
185187

186-
} // End let Predicates = [HasSME]
187-
188188
// Pseudo to match to smstart/smstop. This expands:
189189
//
190190
// pseudonode (pstate_za|pstate_sm), before_call, expected_value

llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,43 @@ define void @streaming_compatible() #0 {
3838

3939
declare void @non_streaming()
4040

41+
42+
; Verify that COALESCER_BARRIER is also supported without +sme.
43+
44+
define void @streaming_compatible_arg(float %f) #0 {
45+
; CHECK-LABEL: streaming_compatible_arg:
46+
; CHECK: // %bb.0:
47+
; CHECK-NEXT: sub sp, sp, #96
48+
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
49+
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
50+
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
51+
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
52+
; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
53+
; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
54+
; CHECK-NEXT: bl __arm_sme_state
55+
; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
56+
; CHECK-NEXT: and x19, x0, #0x1
57+
; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
58+
; CHECK-NEXT: tbz w19, #0, .LBB1_2
59+
; CHECK-NEXT: // %bb.1:
60+
; CHECK-NEXT: smstop sm
61+
; CHECK-NEXT: .LBB1_2:
62+
; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
63+
; CHECK-NEXT: bl non_streaming
64+
; CHECK-NEXT: tbz w19, #0, .LBB1_4
65+
; CHECK-NEXT: // %bb.3:
66+
; CHECK-NEXT: smstart sm
67+
; CHECK-NEXT: .LBB1_4:
68+
; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
69+
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
70+
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
71+
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
72+
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
73+
; CHECK-NEXT: add sp, sp, #96
74+
; CHECK-NEXT: ret
75+
call void @non_streaming(float %f)
76+
ret void
77+
}
78+
79+
4180
attributes #0 = { nounwind "aarch64_pstate_sm_compatible" }

0 commit comments

Comments
 (0)