Skip to content

Commit 93831c7

Browse files
[AArch64] Let patterns for NEON instructions check runtime mode. (llvm#95560)
This helps identify any failures where the compiler might otherwise silently emit instructions that are not valid for the given runtime mode.
1 parent 0d524bc commit 93831c7

8 files changed

+313
-883
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22368,7 +22368,8 @@ static SDValue vectorToScalarBitmask(SDNode *N, SelectionDAG &DAG) {
2236822368
ComparisonResult = DAG.getSExtOrTrunc(ComparisonResult, DL, VecVT);
2236922369

2237022370
SmallVector<SDValue, 16> MaskConstants;
22371-
if (VecVT == MVT::v16i8) {
22371+
if (DAG.getSubtarget<AArch64Subtarget>().isNeonAvailable() &&
22372+
VecVT == MVT::v16i8) {
2237222373
// v16i8 is a special case, as we have 16 entries but only 8 positional bits
2237322374
// per entry. We split it into two halves, apply the mask, zip the halves to
2237422375
// create 8x 16-bit values, and the perform the vector reduce.

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7602,25 +7602,26 @@ multiclass SIMDTwoScalarD<bit U, bits<5> opc, string asm,
76027602
}
76037603

76047604
let mayRaiseFPException = 1, Uses = [FPCR] in
7605-
multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm,
7606-
Predicate pred = HasNEON> {
7607-
let Predicates = [pred] in {
7605+
multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm> {
7606+
let Predicates = [HasNEONandIsStreamingSafe] in {
76087607
def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,[]>;
76097608
def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,[]>;
76107609
}
7611-
let Predicates = [pred, HasFullFP16] in {
7610+
let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
76127611
def v1f16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,[]>;
76137612
}
76147613
}
76157614

76167615
let mayRaiseFPException = 1, Uses = [FPCR] in
76177616
multiclass SIMDFPTwoScalarCVT<bit U, bit S, bits<5> opc, string asm,
76187617
SDPatternOperator OpNode> {
7618+
let Predicates = [HasNEONandIsStreamingSafe] in {
76197619
def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,
76207620
[(set FPR64:$Rd, (OpNode (f64 FPR64:$Rn)))]>;
76217621
def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,
76227622
[(set FPR32:$Rd, (OpNode (f32 FPR32:$Rn)))]>;
7623-
let Predicates = [HasNEON, HasFullFP16] in {
7623+
}
7624+
let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
76247625
def v1i16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,
76257626
[(set (f16 FPR16:$Rd), (OpNode (f16 FPR16:$Rn)))]>;
76267627
}
@@ -7880,7 +7881,7 @@ class SIMDMovAlias<string asm, string size, Instruction inst,
78807881
multiclass SMov {
78817882
// SMOV with vector index of 0 are legal in Scalable Matrix Extension (SME)
78827883
// streaming mode.
7883-
let Predicates = [HasNEONorSME] in {
7884+
let Predicates = [HasNEONandIsStreamingSafe] in {
78847885
def vi8to32_idx0 : SIMDSMov<0, ".b", GPR32, VectorIndex0> {
78857886
let Inst{20-16} = 0b00001;
78867887
}
@@ -7927,7 +7928,7 @@ multiclass SMov {
79277928
multiclass UMov {
79287929
// UMOV with vector index of 0 are legal in Scalable Matrix Extension (SME)
79297930
// streaming mode.
7930-
let Predicates = [HasNEONorSME] in {
7931+
let Predicates = [HasNEONandIsStreamingSafe] in {
79317932
def vi8_idx0 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndex0> {
79327933
let Inst{20-16} = 0b00001;
79337934
}

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPC_IMMO()">,
107107

108108
def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
109109
AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">;
110-
def HasNEON : Predicate<"Subtarget->hasNEON()">,
110+
def HasNEON : Predicate<"Subtarget->isNeonAvailable()">,
111111
AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">;
112112
def HasSM4 : Predicate<"Subtarget->hasSM4()">,
113113
AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">;
@@ -235,11 +235,10 @@ def HasSMEF16F16orSMEF8F16
235235
"sme-f16f16 or sme-f8f16">;
236236

237237
// A subset of NEON instructions are legal in Streaming SVE execution mode,
238-
// they should be enabled if either has been specified.
239-
def HasNEONorSME
240-
: Predicate<"Subtarget->hasNEON() || Subtarget->hasSME()">,
241-
AssemblerPredicateWithAll<(any_of FeatureNEON, FeatureSME),
242-
"neon or sme">;
238+
// so don't need the additional check for 'isNeonAvailable'.
239+
def HasNEONandIsStreamingSafe
240+
: Predicate<"Subtarget->hasNEON()">,
241+
AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
243242
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
244243
AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
245244
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
@@ -323,8 +322,6 @@ def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;
323322

324323
def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
325324

326-
def IsNeonAvailable : Predicate<"Subtarget->isNeonAvailable()">;
327-
328325
def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
329326
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
330327
SDTCisInt<1>]>>;
@@ -1350,7 +1347,7 @@ def : Pat<(v2f32 (int_aarch64_neon_bfdot
13501347
VectorIndexS:$idx)>;
13511348
}
13521349

1353-
let Predicates = [HasNEONorSME, HasBF16] in {
1350+
let Predicates = [HasNEONandIsStreamingSafe, HasBF16] in {
13541351
def BFCVT : BF16ToSinglePrecision<"bfcvt">;
13551352
// Round FP32 to BF16.
13561353
def : Pat<(bf16 (any_fpround (f32 FPR32:$Rn))), (BFCVT $Rn)>;
@@ -5789,9 +5786,9 @@ defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
57895786
defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
57905787
defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
57915788
defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
5792-
defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorSME>;
5793-
defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorSME>;
5794-
defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorSME>;
5789+
defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONandIsStreamingSafe>;
5790+
defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONandIsStreamingSafe>;
5791+
defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONandIsStreamingSafe>;
57955792
defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
57965793
defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
57975794
defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
@@ -5820,7 +5817,7 @@ let Predicates = [HasRDM] in {
58205817

58215818
defm : FMULScalarFromIndexedLane0Patterns<"FMULX", "16", "32", "64",
58225819
int_aarch64_neon_fmulx,
5823-
[HasNEONorSME]>;
5820+
[HasNEONandIsStreamingSafe]>;
58245821

58255822
let Predicates = [HasNEON] in {
58265823
def : InstAlias<"cmls $dst, $src1, $src2",
@@ -5894,9 +5891,9 @@ defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">;
58945891
def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
58955892
defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
58965893
defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
5897-
defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe", HasNEONorSME>;
5898-
defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx", HasNEONorSME>;
5899-
defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte", HasNEONorSME>;
5894+
defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">;
5895+
defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">;
5896+
defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">;
59005897
defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg",
59015898
UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
59025899
defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>;
@@ -5915,7 +5912,7 @@ def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))),
59155912
(CMLTv1i64rz V64:$Rn)>;
59165913

59175914
// Round FP64 to BF16.
5918-
let Predicates = [HasNEONorSME, HasBF16] in
5915+
let Predicates = [HasNEONandIsStreamingSafe, HasBF16] in
59195916
def : Pat<(bf16 (any_fpround (f64 FPR64:$Rn))),
59205917
(BFCVT (FCVTXNv1i64 $Rn))>;
59215918

@@ -6016,7 +6013,7 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
60166013
// Some float -> int -> float conversion patterns for which we want to keep the
60176014
// int values in FP registers using the corresponding NEON instructions to
60186015
// avoid more costly int <-> fp register transfers.
6019-
let Predicates = [HasNEON] in {
6016+
let Predicates = [HasNEONandIsStreamingSafe] in {
60206017
def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
60216018
(SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
60226019
def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
@@ -6026,7 +6023,7 @@ def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
60266023
def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
60276024
(UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
60286025

6029-
let Predicates = [HasFullFP16] in {
6026+
let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
60306027
def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
60316028
(SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
60326029
def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
@@ -6118,7 +6115,7 @@ def : Pat <(f64 (uint_to_fp (i32
61186115
(LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
61196116
// 64-bits -> double are handled in target specific dag combine:
61206117
// performIntToFpCombine.
6121-
} // let Predicates = [HasNEON]
6118+
} // let Predicates = [HasNEONandIsStreamingSafe]
61226119

61236120
//===----------------------------------------------------------------------===//
61246121
// Advanced SIMD three different-sized vector instructions.
@@ -8379,7 +8376,7 @@ def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexH
83798376

83808377
// Same as above, but the first element is populated using
83818378
// scalar_to_vector + insert_subvector instead of insert_vector_elt.
8382-
let Predicates = [IsNeonAvailable] in {
8379+
let Predicates = [HasNEON] in {
83838380
class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
83848381
SDPatternOperator ExtLoad, Instruction LD1>
83858382
: Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))),

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3351,7 +3351,7 @@ let Predicates = [HasSVEorSME] in {
33513351
(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;
33523352

33533353
// Extract element from vector with immediate index that's within the bottom 128-bits.
3354-
let Predicates = [IsNeonAvailable], AddedComplexity = 1 in {
3354+
let Predicates = [HasNEON], AddedComplexity = 1 in {
33553355
def : Pat<(i32 (vector_extract nxv16i8:$vec, VectorIndexB:$index)),
33563356
(UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index)>;
33573357
def : Pat<(i32 (vector_extract nxv8i16:$vec, VectorIndexH:$index)),
@@ -3360,9 +3360,9 @@ let Predicates = [HasSVEorSME] in {
33603360
(UMOVvi32 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index)>;
33613361
def : Pat<(i64 (vector_extract nxv2i64:$vec, VectorIndexD:$index)),
33623362
(UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index)>;
3363-
} // End IsNeonAvailable
3363+
} // End HasNEON
33643364

3365-
let Predicates = [IsNeonAvailable] in {
3365+
let Predicates = [HasNEON] in {
33663366
def : Pat<(sext_inreg (vector_extract nxv16i8:$vec, VectorIndexB:$index), i8),
33673367
(SMOVvi8to32 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index)>;
33683368
def : Pat<(sext_inreg (anyext (i32 (vector_extract nxv16i8:$vec, VectorIndexB:$index))), i8),
@@ -3375,7 +3375,7 @@ let Predicates = [HasSVEorSME] in {
33753375

33763376
def : Pat<(sext (i32 (vector_extract nxv4i32:$vec, VectorIndexS:$index))),
33773377
(SMOVvi32to64 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index)>;
3378-
} // End IsNeonAvailable
3378+
} // End HasNEON
33793379

33803380
// Extract first element from vector.
33813381
let AddedComplexity = 2 in {

0 commit comments

Comments
 (0)