Skip to content

Commit 4a33a6f

Browse files
committed
[AArch64] Let patterns for NEON instructions check runtime mode.
This helps identify any failures where the compiler might otherwise silently emit instructions that are not valid for the given runtime mode. We can probably do a similar thing for HasSVE predicates.
1 parent 534f856 commit 4a33a6f

File tree

6 files changed

+289
-862
lines changed

6 files changed

+289
-862
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22271,7 +22271,8 @@ static SDValue vectorToScalarBitmask(SDNode *N, SelectionDAG &DAG) {
2227122271
ComparisonResult = DAG.getSExtOrTrunc(ComparisonResult, DL, VecVT);
2227222272

2227322273
SmallVector<SDValue, 16> MaskConstants;
22274-
if (VecVT == MVT::v16i8) {
22274+
if (DAG.getSubtarget<AArch64Subtarget>().isNeonAvailable() &&
22275+
VecVT == MVT::v16i8) {
2227522276
// v16i8 is a special case, as we have 16 entries but only 8 positional bits
2227622277
// per entry. We split it into two halves, apply the mask, zip the halves to
2227722278
// create 8x 16-bit values, and the perform the vector reduce.

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPC_IMMO()">,
107107

108108
def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
109109
AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">;
110-
def HasNEON : Predicate<"Subtarget->hasNEON()">,
110+
def HasNEON : Predicate<"Subtarget->isNeonAvailable()">,
111111
AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">;
112112
def HasSM4 : Predicate<"Subtarget->hasSM4()">,
113113
AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">;
@@ -234,12 +234,9 @@ def HasSMEF16F16orSMEF8F16
234234
AssemblerPredicateWithAll<(any_of FeatureSMEF16F16, FeatureSMEF8F16),
235235
"sme-f16f16 or sme-f8f16">;
236236

237-
// A subset of NEON instructions are legal in Streaming SVE execution mode,
238-
// they should be enabled if either has been specified.
239-
def HasNEONorSME
240-
: Predicate<"Subtarget->hasNEON() || Subtarget->hasSME()">,
241-
AssemblerPredicateWithAll<(any_of FeatureNEON, FeatureSME),
242-
"neon or sme">;
237+
// A subset of NEON instructions are legal in Streaming SVE execution mode.
238+
def HasNEONorSME : Predicate<"Subtarget->hasNEON()">,
239+
AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
243240
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
244241
AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
245242
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
@@ -323,8 +320,6 @@ def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;
323320

324321
def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
325322

326-
def IsNeonAvailable : Predicate<"Subtarget->isNeonAvailable()">;
327-
328323
def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
329324
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
330325
SDTCisInt<1>]>>;
@@ -6016,7 +6011,7 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
60166011
// Some float -> int -> float conversion patterns for which we want to keep the
60176012
// int values in FP registers using the corresponding NEON instructions to
60186013
// avoid more costly int <-> fp register transfers.
6019-
let Predicates = [HasNEON] in {
6014+
let Predicates = [HasNEONorSME] in {
60206015
def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
60216016
(SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
60226017
def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
@@ -8379,7 +8374,7 @@ def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexH
83798374

83808375
// Same as above, but the first element is populated using
83818376
// scalar_to_vector + insert_subvector instead of insert_vector_elt.
8382-
let Predicates = [IsNeonAvailable] in {
8377+
let Predicates = [HasNeonOrSME] in {
83838378
class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
83848379
SDPatternOperator ExtLoad, Instruction LD1>
83858380
: Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))),

0 commit comments

Comments
 (0)