Skip to content

Commit bc05314

Browse files
sdesmalen-armtomtor
authored andcommitted
[AArch64] Observe Z-reg inline asm clobbers without SVE (llvm#143742)
inline asm that clobbers any of the z-registers when not in streaming mode, should still observe that the lower 128 bits of those registers are clobbered.
1 parent 703c065 commit bc05314

File tree

2 files changed

+102
-9
lines changed

2 files changed

+102
-9
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12281,13 +12281,14 @@ enum class PredicateConstraint { Uph, Upl, Upa };
1228112281
// not what we want. The code here pre-empts this by matching the register
1228212282
// explicitly.
1228312283
static std::optional<std::pair<unsigned, const TargetRegisterClass *>>
12284-
parsePredicateRegAsConstraint(StringRef Constraint) {
12284+
parseSVERegAsConstraint(StringRef Constraint) {
1228512285
if (!Constraint.starts_with('{') || !Constraint.ends_with('}') ||
12286-
Constraint[1] != 'p')
12286+
(Constraint[1] != 'p' && Constraint[1] != 'z'))
1228712287
return std::nullopt;
1228812288

12289+
bool IsPredicate = Constraint[1] == 'p';
1228912290
Constraint = Constraint.substr(2, Constraint.size() - 3);
12290-
bool IsPredicateAsCount = Constraint.starts_with("n");
12291+
bool IsPredicateAsCount = IsPredicate && Constraint.starts_with("n");
1229112292
if (IsPredicateAsCount)
1229212293
Constraint = Constraint.drop_front(1);
1229312294

@@ -12297,8 +12298,9 @@ parsePredicateRegAsConstraint(StringRef Constraint) {
1229712298

1229812299
if (IsPredicateAsCount)
1229912300
return std::make_pair(AArch64::PN0 + V, &AArch64::PNRRegClass);
12300-
else
12301+
if (IsPredicate)
1230112302
return std::make_pair(AArch64::P0 + V, &AArch64::PPRRegClass);
12303+
return std::make_pair(AArch64::Z0 + V, &AArch64::ZPRRegClass);
1230212304
}
1230312305

1230412306
static std::optional<PredicateConstraint>
@@ -12548,8 +12550,16 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
1254812550
break;
1254912551
}
1255012552
} else {
12551-
if (const auto P = parsePredicateRegAsConstraint(Constraint))
12553+
if (const auto P = parseSVERegAsConstraint(Constraint)) {
12554+
// SME functions that are not in streaming mode, should
12555+
// still observe clobbers of Z-registers by clobbering
12556+
// the lower 128bits of those registers.
12557+
if (AArch64::ZPRRegClass.hasSubClassEq(P->second) &&
12558+
!Subtarget->isSVEorStreamingSVEAvailable())
12559+
return std::make_pair(TRI->getSubReg(P->first, AArch64::zsub),
12560+
&AArch64::FPR128RegClass);
1255212561
return *P;
12562+
}
1255312563
if (const auto PC = parsePredicateConstraint(Constraint))
1255412564
if (const auto *RegClass = getPredicateRegisterClass(*PC, VT))
1255512565
return std::make_pair(0U, RegClass);

llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll

Lines changed: 87 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
; RUN: llc < %s -mtriple aarch64-none-linux-gnu -mattr=+sme2 -force-streaming -stop-after=finalize-isel | FileCheck %s
1+
; RUN: llc < %s -mtriple aarch64-none-linux-gnu -stop-after=finalize-isel | FileCheck %s
22

3-
define void @UphPNR(target("aarch64.svcount") %predcnt) {
3+
define void @UphPNR(target("aarch64.svcount") %predcnt) "target-features"="+sme2" "aarch64_pstate_sm_enabled" {
44
entry:
55
; CHECK: %0:ppr = COPY $p0
66
; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store (<vscale x 1 x s16>) into %ir.predcnt.addr)
@@ -14,7 +14,7 @@ entry:
1414
ret void
1515
}
1616

17-
define void @UpaPNR(target("aarch64.svcount") %predcnt) {
17+
define void @UpaPNR(target("aarch64.svcount") %predcnt) "target-features"="+sme2" "aarch64_pstate_sm_enabled" {
1818
entry:
1919
; CHECK: %0:ppr = COPY $p0
2020
; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store (<vscale x 1 x s16>) into %ir.predcnt.addr)
@@ -28,7 +28,7 @@ entry:
2828
ret void
2929
}
3030

31-
define void @UplPNR(target("aarch64.svcount") %predcnt) {
31+
define void @UplPNR(target("aarch64.svcount") %predcnt) "target-features"="+sme2" "aarch64_pstate_sm_enabled" {
3232
entry:
3333
; CHECK: %0:ppr = COPY $p0
3434
; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store (<vscale x 1 x s16>) into %ir.predcnt.addr)
@@ -41,3 +41,86 @@ entry:
4141
call void asm sideeffect "fadd z0.h, $0/m, z0.h, #0.5", "@3Upl"(target("aarch64.svcount") %0)
4242
ret void
4343
}
44+
45+
; Test that the z-register clobbers result in preserving %0 across the inline asm call.
46+
define <2 x float> @sme_nosve_nonstreaming(ptr %in) "target-features"="+sme,-sve" {
47+
entry:
48+
; CHECK-LABEL: name: sme_nosve_nonstreaming
49+
; CHECK: INLINEASM &"smstart sm; smstop sm;"
50+
; CHECK-SAME: implicit-def early-clobber $q0
51+
; CHECK-SAME: implicit-def early-clobber $q1
52+
; CHECK-SAME: implicit-def early-clobber $q2
53+
; CHECK-SAME: implicit-def early-clobber $q3
54+
; CHECK-SAME: implicit-def early-clobber $q4
55+
; CHECK-SAME: implicit-def early-clobber $q5
56+
; CHECK-SAME: implicit-def early-clobber $q6
57+
; CHECK-SAME: implicit-def early-clobber $q7
58+
; CHECK-SAME: implicit-def early-clobber $q8
59+
; CHECK-SAME: implicit-def early-clobber $q9
60+
; CHECK-SAME: implicit-def early-clobber $q10
61+
; CHECK-SAME: implicit-def early-clobber $q11
62+
; CHECK-SAME: implicit-def early-clobber $q12
63+
; CHECK-SAME: implicit-def early-clobber $q13
64+
; CHECK-SAME: implicit-def early-clobber $q14
65+
; CHECK-SAME: implicit-def early-clobber $q15
66+
; CHECK-SAME: implicit-def early-clobber $q16
67+
; CHECK-SAME: implicit-def early-clobber $q17
68+
; CHECK-SAME: implicit-def early-clobber $q18
69+
; CHECK-SAME: implicit-def early-clobber $q19
70+
; CHECK-SAME: implicit-def early-clobber $q20
71+
; CHECK-SAME: implicit-def early-clobber $q21
72+
; CHECK-SAME: implicit-def early-clobber $q22
73+
; CHECK-SAME: implicit-def early-clobber $q23
74+
; CHECK-SAME: implicit-def early-clobber $q24
75+
; CHECK-SAME: implicit-def early-clobber $q25
76+
; CHECK-SAME: implicit-def early-clobber $q26
77+
; CHECK-SAME: implicit-def early-clobber $q27
78+
; CHECK-SAME: implicit-def early-clobber $q28
79+
; CHECK-SAME: implicit-def early-clobber $q29
80+
; CHECK-SAME: implicit-def early-clobber $q30
81+
; CHECK-SAME: implicit-def early-clobber $q31
82+
%0 = load <2 x float>, ptr %in, align 8
83+
call void asm sideeffect "smstart sm; smstop sm;", "~{z0},~{z1},~{z2},~{z3},~{z4},~{z5},~{z6},~{z7},~{z8},~{z9},~{z10},~{z11},~{z12},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"()
84+
ret <2 x float> %0
85+
}
86+
87+
define <2 x float> @sme_nosve_streaming(ptr %in) "target-features"="+sme,-sve" "aarch64_pstate_sm_enabled" {
88+
entry:
89+
; CHECK-LABEL: name: sme_nosve_streaming
90+
; CHECK: INLINEASM &"smstart sm; smstop sm;"
91+
; CHECK-SAME: implicit-def early-clobber $z0
92+
; CHECK-SAME: implicit-def early-clobber $z1
93+
; CHECK-SAME: implicit-def early-clobber $z2
94+
; CHECK-SAME: implicit-def early-clobber $z3
95+
; CHECK-SAME: implicit-def early-clobber $z4
96+
; CHECK-SAME: implicit-def early-clobber $z5
97+
; CHECK-SAME: implicit-def early-clobber $z6
98+
; CHECK-SAME: implicit-def early-clobber $z7
99+
; CHECK-SAME: implicit-def early-clobber $z8
100+
; CHECK-SAME: implicit-def early-clobber $z9
101+
; CHECK-SAME: implicit-def early-clobber $z10
102+
; CHECK-SAME: implicit-def early-clobber $z11
103+
; CHECK-SAME: implicit-def early-clobber $z12
104+
; CHECK-SAME: implicit-def early-clobber $z13
105+
; CHECK-SAME: implicit-def early-clobber $z14
106+
; CHECK-SAME: implicit-def early-clobber $z15
107+
; CHECK-SAME: implicit-def early-clobber $z16
108+
; CHECK-SAME: implicit-def early-clobber $z17
109+
; CHECK-SAME: implicit-def early-clobber $z18
110+
; CHECK-SAME: implicit-def early-clobber $z19
111+
; CHECK-SAME: implicit-def early-clobber $z20
112+
; CHECK-SAME: implicit-def early-clobber $z21
113+
; CHECK-SAME: implicit-def early-clobber $z22
114+
; CHECK-SAME: implicit-def early-clobber $z23
115+
; CHECK-SAME: implicit-def early-clobber $z24
116+
; CHECK-SAME: implicit-def early-clobber $z25
117+
; CHECK-SAME: implicit-def early-clobber $z26
118+
; CHECK-SAME: implicit-def early-clobber $z27
119+
; CHECK-SAME: implicit-def early-clobber $z28
120+
; CHECK-SAME: implicit-def early-clobber $z29
121+
; CHECK-SAME: implicit-def early-clobber $z30
122+
; CHECK-SAME: implicit-def early-clobber $z31
123+
%0 = load <2 x float>, ptr %in, align 8
124+
call void asm sideeffect "smstart sm; smstop sm;", "~{z0},~{z1},~{z2},~{z3},~{z4},~{z5},~{z6},~{z7},~{z8},~{z9},~{z10},~{z11},~{z12},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"()
125+
ret <2 x float> %0
126+
}

0 commit comments

Comments
 (0)