Skip to content

Commit 3172366

Browse files
committed
[AArch64] Let patterns for NEON instructions check runtime mode.
This helps identify any failures where the compiler might otherwise silently emit instructions that are not valid for the given runtime mode. We can probably do a similar thing for HasSVE predicates.
1 parent f484c79 commit 3172366

File tree

6 files changed

+289
-862
lines changed

6 files changed

+289
-862
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22185,7 +22185,8 @@ static SDValue vectorToScalarBitmask(SDNode *N, SelectionDAG &DAG) {
2218522185
ComparisonResult = DAG.getSExtOrTrunc(ComparisonResult, DL, VecVT);
2218622186

2218722187
SmallVector<SDValue, 16> MaskConstants;
22188-
if (VecVT == MVT::v16i8) {
22188+
if (DAG.getSubtarget<AArch64Subtarget>().isNeonAvailable() &&
22189+
VecVT == MVT::v16i8) {
2218922190
// v16i8 is a special case, as we have 16 entries but only 8 positional bits
2219022191
// per entry. We split it into two halves, apply the mask, zip the halves to
2219122192
// create 8x 16-bit values, and the perform the vector reduce.

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPC_IMMO()">,
107107

108108
def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
109109
AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">;
110-
def HasNEON : Predicate<"Subtarget->hasNEON()">,
110+
def HasNEON : Predicate<"Subtarget->isNeonAvailable()">,
111111
AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">;
112112
def HasSM4 : Predicate<"Subtarget->hasSM4()">,
113113
AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">;
@@ -234,12 +234,9 @@ def HasSMEF16F16orSMEF8F16
234234
AssemblerPredicateWithAll<(any_of FeatureSMEF16F16, FeatureSMEF8F16),
235235
"sme-f16f16 or sme-f8f16">;
236236

237-
// A subset of NEON instructions are legal in Streaming SVE execution mode,
238-
// they should be enabled if either has been specified.
239-
def HasNEONorSME
240-
: Predicate<"Subtarget->hasNEON() || Subtarget->hasSME()">,
241-
AssemblerPredicateWithAll<(any_of FeatureNEON, FeatureSME),
242-
"neon or sme">;
237+
// A subset of NEON instructions are legal in Streaming SVE execution mode.
238+
def HasNEONorSME : Predicate<"Subtarget->hasNEON()">,
239+
AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
243240
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
244241
AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
245242
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
@@ -323,8 +320,6 @@ def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;
323320

324321
def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
325322

326-
def IsNeonAvailable : Predicate<"Subtarget->isNeonAvailable()">;
327-
328323
def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
329324
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
330325
SDTCisInt<1>]>>;
@@ -5934,7 +5929,7 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
59345929
// Some float -> int -> float conversion patterns for which we want to keep the
59355930
// int values in FP registers using the corresponding NEON instructions to
59365931
// avoid more costly int <-> fp register transfers.
5937-
let Predicates = [HasNEON] in {
5932+
let Predicates = [HasNEONorSME] in {
59385933
def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
59395934
(SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
59405935
def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
@@ -8297,7 +8292,7 @@ def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexH
82978292

82988293
// Same as above, but the first element is populated using
82998294
// scalar_to_vector + insert_subvector instead of insert_vector_elt.
8300-
let Predicates = [IsNeonAvailable] in {
8295+
let Predicates = [HasNeonOrSME] in {
83018296
class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
83028297
SDPatternOperator ExtLoad, Instruction LD1>
83038298
: Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))),

0 commit comments

Comments
 (0)