Skip to content

[AArch64] Let patterns for NEON instructions check runtime mode. #95560

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22271,7 +22271,8 @@ static SDValue vectorToScalarBitmask(SDNode *N, SelectionDAG &DAG) {
ComparisonResult = DAG.getSExtOrTrunc(ComparisonResult, DL, VecVT);

SmallVector<SDValue, 16> MaskConstants;
if (VecVT == MVT::v16i8) {
if (DAG.getSubtarget<AArch64Subtarget>().isNeonAvailable() &&
VecVT == MVT::v16i8) {
// v16i8 is a special case, as we have 16 entries but only 8 positional bits
// per entry. We split it into two halves, apply the mask, zip the halves to
// create 8x 16-bit values, and the perform the vector reduce.
Expand Down
15 changes: 8 additions & 7 deletions llvm/lib/Target/AArch64/AArch64InstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -7602,25 +7602,26 @@ multiclass SIMDTwoScalarD<bit U, bits<5> opc, string asm,
}

let mayRaiseFPException = 1, Uses = [FPCR] in
multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm,
Predicate pred = HasNEON> {
let Predicates = [pred] in {
multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm> {
let Predicates = [HasNEONandIsStreamingSafe] in {
def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,[]>;
def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,[]>;
}
let Predicates = [pred, HasFullFP16] in {
let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
def v1f16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,[]>;
}
}

let mayRaiseFPException = 1, Uses = [FPCR] in
multiclass SIMDFPTwoScalarCVT<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
let Predicates = [HasNEONandIsStreamingSafe] in {
def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,
[(set FPR64:$Rd, (OpNode (f64 FPR64:$Rn)))]>;
def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,
[(set FPR32:$Rd, (OpNode (f32 FPR32:$Rn)))]>;
let Predicates = [HasNEON, HasFullFP16] in {
}
let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
def v1i16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,
[(set (f16 FPR16:$Rd), (OpNode (f16 FPR16:$Rn)))]>;
}
Expand Down Expand Up @@ -7880,7 +7881,7 @@ class SIMDMovAlias<string asm, string size, Instruction inst,
multiclass SMov {
// SMOV with vector index of 0 are legal in Scalable Matrix Extension (SME)
// streaming mode.
let Predicates = [HasNEONorSME] in {
let Predicates = [HasNEONandIsStreamingSafe] in {
def vi8to32_idx0 : SIMDSMov<0, ".b", GPR32, VectorIndex0> {
let Inst{20-16} = 0b00001;
}
Expand Down Expand Up @@ -7927,7 +7928,7 @@ multiclass SMov {
multiclass UMov {
// UMOV with vector index of 0 are legal in Scalable Matrix Extension (SME)
// streaming mode.
let Predicates = [HasNEONorSME] in {
let Predicates = [HasNEONandIsStreamingSafe] in {
def vi8_idx0 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndex0> {
let Inst{20-16} = 0b00001;
}
Expand Down
39 changes: 18 additions & 21 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPC_IMMO()">,

def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
def HasNEON : Predicate<"Subtarget->isNeonAvailable()">,
AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">;
def HasSM4 : Predicate<"Subtarget->hasSM4()">,
AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">;
Expand Down Expand Up @@ -235,11 +235,10 @@ def HasSMEF16F16orSMEF8F16
"sme-f16f16 or sme-f8f16">;

// A subset of NEON instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
def HasNEONorSME
: Predicate<"Subtarget->hasNEON() || Subtarget->hasSME()">,
AssemblerPredicateWithAll<(any_of FeatureNEON, FeatureSME),
"neon or sme">;
// so don't need the additional check for 'isNeonAvailable'.
def HasNEONandIsStreamingSafe
: Predicate<"Subtarget->hasNEON()">,
AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
Expand Down Expand Up @@ -323,8 +322,6 @@ def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;

def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;

def IsNeonAvailable : Predicate<"Subtarget->isNeonAvailable()">;

def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisInt<1>]>>;
Expand Down Expand Up @@ -1350,7 +1347,7 @@ def : Pat<(v2f32 (int_aarch64_neon_bfdot
VectorIndexS:$idx)>;
}

let Predicates = [HasNEONorSME, HasBF16] in {
let Predicates = [HasNEONandIsStreamingSafe, HasBF16] in {
def BFCVT : BF16ToSinglePrecision<"bfcvt">;
// Round FP32 to BF16.
def : Pat<(bf16 (any_fpround (f32 FPR32:$Rn))), (BFCVT $Rn)>;
Expand Down Expand Up @@ -5789,9 +5786,9 @@ defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorSME>;
defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorSME>;
defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorSME>;
defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONandIsStreamingSafe>;
defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONandIsStreamingSafe>;
defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONandIsStreamingSafe>;
defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
Expand Down Expand Up @@ -5820,7 +5817,7 @@ let Predicates = [HasRDM] in {

defm : FMULScalarFromIndexedLane0Patterns<"FMULX", "16", "32", "64",
int_aarch64_neon_fmulx,
[HasNEONorSME]>;
[HasNEONandIsStreamingSafe]>;

let Predicates = [HasNEON] in {
def : InstAlias<"cmls $dst, $src1, $src2",
Expand Down Expand Up @@ -5894,9 +5891,9 @@ defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">;
def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe", HasNEONorSME>;
defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx", HasNEONorSME>;
defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte", HasNEONorSME>;
defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">;
defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">;
defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">;
defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg",
UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>;
Expand All @@ -5915,7 +5912,7 @@ def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))),
(CMLTv1i64rz V64:$Rn)>;

// Round FP64 to BF16.
let Predicates = [HasNEONorSME, HasBF16] in
let Predicates = [HasNEONandIsStreamingSafe, HasBF16] in
def : Pat<(bf16 (any_fpround (f64 FPR64:$Rn))),
(BFCVT (FCVTXNv1i64 $Rn))>;

Expand Down Expand Up @@ -6016,7 +6013,7 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
// Some float -> int -> float conversion patterns for which we want to keep the
// int values in FP registers using the corresponding NEON instructions to
// avoid more costly int <-> fp register transfers.
let Predicates = [HasNEON] in {
let Predicates = [HasNEONandIsStreamingSafe] in {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The predicate here doesn't match that used by the instruction's definition (e.g. defm FCVTZS : ...). This looks fine because those definitions don't include any patterns. However, that does suggest the HasNEONandIsStreamingSafe passed into some of those classes (e.g. defm FRECPE : ...) serves no purpose and can be removed rather than changed?

If you keep this change then please update the closing } comment because that still references HasNEON.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, done!

def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
(SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
Expand All @@ -6026,7 +6023,7 @@ def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
(UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;

let Predicates = [HasFullFP16] in {
let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
(SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
Expand Down Expand Up @@ -6118,7 +6115,7 @@ def : Pat <(f64 (uint_to_fp (i32
(LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
// 64-bits -> double are handled in target specific dag combine:
// performIntToFpCombine.
} // let Predicates = [HasNEON]
} // let Predicates = [HasNEONandIsStreamingSafe]

//===----------------------------------------------------------------------===//
// Advanced SIMD three different-sized vector instructions.
Expand Down Expand Up @@ -8379,7 +8376,7 @@ def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexH

// Same as above, but the first element is populated using
// scalar_to_vector + insert_subvector instead of insert_vector_elt.
let Predicates = [IsNeonAvailable] in {
let Predicates = [HasNEON] in {
class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
SDPatternOperator ExtLoad, Instruction LD1>
: Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))),
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -3351,7 +3351,7 @@ let Predicates = [HasSVEorSME] in {
(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;

// Extract element from vector with immediate index that's within the bottom 128-bits.
let Predicates = [IsNeonAvailable], AddedComplexity = 1 in {
let Predicates = [HasNEON], AddedComplexity = 1 in {
def : Pat<(i32 (vector_extract nxv16i8:$vec, VectorIndexB:$index)),
(UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index)>;
def : Pat<(i32 (vector_extract nxv8i16:$vec, VectorIndexH:$index)),
Expand All @@ -3360,9 +3360,9 @@ let Predicates = [HasSVEorSME] in {
(UMOVvi32 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index)>;
def : Pat<(i64 (vector_extract nxv2i64:$vec, VectorIndexD:$index)),
(UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index)>;
} // End IsNeonAvailable
} // End HasNEON

let Predicates = [IsNeonAvailable] in {
let Predicates = [HasNEON] in {
def : Pat<(sext_inreg (vector_extract nxv16i8:$vec, VectorIndexB:$index), i8),
(SMOVvi8to32 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index)>;
def : Pat<(sext_inreg (anyext (i32 (vector_extract nxv16i8:$vec, VectorIndexB:$index))), i8),
Expand All @@ -3375,7 +3375,7 @@ let Predicates = [HasSVEorSME] in {

def : Pat<(sext (i32 (vector_extract nxv4i32:$vec, VectorIndexS:$index))),
(SMOVvi32to64 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index)>;
} // End IsNeonAvailable
} // End HasNEON

// Extract first element from vector.
let AddedComplexity = 2 in {
Expand Down
Loading
Loading