Skip to content

Commit c52d950

Browse files
committed
[AArch64][SVE] Add asm predicate constraint Uph
Some instructions such as multi-vector LD1 only accept a range of PN8-PN15 predicate-as-counter. This new constraint allows more refined parsing and better decision making when parsing these instructions from ASM, instead of defaulting to Upa which incorrectly uses the whole range of registers P0-P15 from the register class PPR. Differential Revision: https://reviews.llvm.org/D157517
1 parent b108c11 commit c52d950

File tree

5 files changed

+65
-21
lines changed

5 files changed

+65
-21
lines changed

clang/lib/Basic/Targets/AArch64.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1288,8 +1288,9 @@ bool AArch64TargetInfo::validateAsmConstraint(
12881288
Info.setAllowsRegister();
12891289
return true;
12901290
case 'U':
1291-
if (Name[1] == 'p' && (Name[2] == 'l' || Name[2] == 'a')) {
1292-
// SVE predicate registers ("Upa"=P0-15, "Upl"=P0-P7)
1291+
if (Name[1] == 'p' &&
1292+
(Name[2] == 'l' || Name[2] == 'a' || Name[2] == 'h')) {
1293+
// SVE predicate registers ("Upa"=P0-15, "Upl"=P0-P7, "Uph"=P8-P15)
12931294
Info.setAllowsRegister();
12941295
Name += 2;
12951296
return true;

clang/test/CodeGen/aarch64-sve-inline-asm-datatypes.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,30 @@ SVBOOL_TEST_UPL(__SVInt32_t, s) ;
168168
SVBOOL_TEST_UPL(__SVInt64_t, d) ;
169169
// CHECK: call <vscale x 16 x i1> asm sideeffect "fadd $0.d, $1.d, $2.d, $3.d\0A", "=w,@3Upl,w,w"(<vscale x 16 x i1> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3)
170170

171+
#define SVBOOL_TEST_UPH(DT, KIND)\
172+
__SVBool_t func_bool_uph_##KIND(__SVBool_t in1, DT in2, DT in3)\
173+
{\
174+
__SVBool_t out;\
175+
asm volatile (\
176+
"fadd %[out]." #KIND ", %[in1]." #KIND ", %[in2]." #KIND ", %[in3]." #KIND "\n"\
177+
: [out] "=w" (out)\
178+
: [in1] "Uph" (in1),\
179+
[in2] "w" (in2),\
180+
[in3] "w" (in3)\
181+
:);\
182+
return out;\
183+
}
184+
185+
SVBOOL_TEST_UPH(__SVInt8_t, b) ;
186+
// CHECK: call <vscale x 16 x i1> asm sideeffect "fadd $0.b, $1.b, $2.b, $3.b\0A", "=w,@3Uph,w,w"(<vscale x 16 x i1> %in1, <vscale x 16 x i8> %in2, <vscale x 16 x i8> %in3)
187+
SVBOOL_TEST_UPH(__SVInt16_t, h) ;
188+
// CHECK: call <vscale x 16 x i1> asm sideeffect "fadd $0.h, $1.h, $2.h, $3.h\0A", "=w,@3Uph,w,w"(<vscale x 16 x i1> %in1, <vscale x 8 x i16> %in2, <vscale x 8 x i16> %in3)
189+
SVBOOL_TEST_UPH(__SVInt32_t, s) ;
190+
// CHECK: call <vscale x 16 x i1> asm sideeffect "fadd $0.s, $1.s, $2.s, $3.s\0A", "=w,@3Uph,w,w"(<vscale x 16 x i1> %in1, <vscale x 4 x i32> %in2, <vscale x 4 x i32> %in3)
191+
SVBOOL_TEST_UPH(__SVInt64_t, d) ;
192+
// CHECK: call <vscale x 16 x i1> asm sideeffect "fadd $0.d, $1.d, $2.d, $3.d\0A", "=w,@3Uph,w,w"(<vscale x 16 x i1> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3)
193+
194+
171195
#define SVFLOAT_TEST(DT,KIND)\
172196
DT func_float_##DT##KIND(DT inout1, DT in2)\
173197
{\

llvm/docs/LangRef.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4997,7 +4997,8 @@ AArch64:
49974997
- ``w``: A 32, 64, or 128-bit floating-point, SIMD or SVE vector register.
49984998
- ``x``: Like w, but restricted to registers 0 to 15 inclusive.
49994999
- ``y``: Like w, but restricted to SVE vector registers Z0 to Z7 inclusive.
5000-
- ``Upl``: One of the low eight SVE predicate registers (P0 to P7)
5000+
- ``Uph``: One of the upper eight SVE predicate registers (P8 to P15)
5001+
- ``Upl``: One of the lower eight SVE predicate registers (P0 to P7)
50015002
- ``Upa``: Any of the SVE predicate registers (P0 to P15)
50025003

50035004
AMDGPU:

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9987,19 +9987,31 @@ const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
99879987
return "r";
99889988
}
99899989

9990-
enum PredicateConstraint {
9991-
Upl,
9992-
Upa,
9993-
Invalid
9994-
};
9990+
enum PredicateConstraint { Uph, Upl, Upa, Invalid };
99959991

99969992
static PredicateConstraint parsePredicateConstraint(StringRef Constraint) {
9997-
PredicateConstraint P = PredicateConstraint::Invalid;
9998-
if (Constraint == "Upa")
9999-
P = PredicateConstraint::Upa;
10000-
if (Constraint == "Upl")
10001-
P = PredicateConstraint::Upl;
10002-
return P;
9993+
return StringSwitch<PredicateConstraint>(Constraint)
9994+
.Case("Uph", PredicateConstraint::Uph)
9995+
.Case("Upl", PredicateConstraint::Upl)
9996+
.Case("Upa", PredicateConstraint::Upa)
9997+
.Default(PredicateConstraint::Invalid);
9998+
}
9999+
10000+
static const TargetRegisterClass *
10001+
getPredicateRegisterClass(PredicateConstraint Constraint, EVT VT) {
10002+
if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
10003+
return nullptr;
10004+
10005+
switch (Constraint) {
10006+
default:
10007+
return nullptr;
10008+
case PredicateConstraint::Uph:
10009+
return &AArch64::PPR_p8to15RegClass;
10010+
case PredicateConstraint::Upl:
10011+
return &AArch64::PPR_3bRegClass;
10012+
case PredicateConstraint::Upa:
10013+
return &AArch64::PPRRegClass;
10014+
}
1000310015
}
1000410016

1000510017
// The set of cc code supported is from
@@ -10191,13 +10203,8 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
1019110203
}
1019210204
} else {
1019310205
PredicateConstraint PC = parsePredicateConstraint(Constraint);
10194-
if (PC != PredicateConstraint::Invalid) {
10195-
if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
10196-
return std::make_pair(0U, nullptr);
10197-
bool restricted = (PC == PredicateConstraint::Upl);
10198-
return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass)
10199-
: std::make_pair(0U, &AArch64::PPRRegClass);
10200-
}
10206+
if (const TargetRegisterClass *RegClass = getPredicateRegisterClass(PC, VT))
10207+
return std::make_pair(0U, RegClass);
1020110208
}
1020210209
if (StringRef("{cc}").equals_insensitive(Constraint) ||
1020310210
parseConstraintCode(Constraint) != AArch64CC::Invalid)

llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,14 @@ define <vscale x 4 x i32> @test_incp(<vscale x 16 x i1> %Pg, <vscale x 4 x i32>
6868
%1 = tail call <vscale x 4 x i32> asm "incp $0.s, $1", "=w,@3Upa,0"(<vscale x 16 x i1> %Pg, <vscale x 4 x i32> %Zn)
6969
ret <vscale x 4 x i32> %1
7070
}
71+
72+
; Function Attrs: nounwind readnone
73+
; CHECK: [[ARG1:%[0-9]+]]:zpr = COPY $z1
74+
; CHECK: [[ARG2:%[0-9]+]]:zpr = COPY $z0
75+
; CHECK: [[ARG3:%[0-9]+]]:ppr = COPY $p0
76+
; CHECK: [[ARG4:%[0-9]+]]:ppr_p8to15 = COPY [[ARG3]]
77+
; CHECK: INLINEASM {{.*}} [[ARG4]]
78+
define <vscale x 8 x half> @test_svfadd_f16_Uph_constraint(<vscale x 16 x i1> %Pg, <vscale x 8 x half> %Zn, <vscale x 8 x half> %Zm) {
79+
%1 = tail call <vscale x 8 x half> asm "fadd $0.h, $1/m, $2.h, $3.h", "=w,@3Uph,w,w"(<vscale x 16 x i1> %Pg, <vscale x 8 x half> %Zn, <vscale x 8 x half> %Zm)
80+
ret <vscale x 8 x half> %1
81+
}

0 commit comments

Comments
 (0)