Skip to content

Commit 1d066b4

Browse files
sdesmalen-armAlexisPerry
authored andcommitted
[AArch64] Consider runtime mode when deciding to use SVE for fixed-length vectors. (llvm#96081)
This also fixes the case where an SVE div is incorrectly to be assumed available in non-streaming mode with SME.
1 parent 0c2a4fa commit 1d066b4

File tree

56 files changed

+441
-379
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+441
-379
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 32 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1418,7 +1418,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
14181418
}
14191419
}
14201420

1421-
if (Subtarget->hasSVEorSME()) {
1421+
if (Subtarget->isSVEorStreamingSVEAvailable()) {
14221422
for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
14231423
setOperationAction(ISD::BITREVERSE, VT, Custom);
14241424
setOperationAction(ISD::BSWAP, VT, Custom);
@@ -1430,8 +1430,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
14301430
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
14311431
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
14321432
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1433-
setOperationAction(ISD::MGATHER, VT, Custom);
1434-
setOperationAction(ISD::MSCATTER, VT, Custom);
14351433
setOperationAction(ISD::MLOAD, VT, Custom);
14361434
setOperationAction(ISD::MUL, VT, Custom);
14371435
setOperationAction(ISD::MULHS, VT, Custom);
@@ -1528,14 +1526,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
15281526
}
15291527
}
15301528

1531-
// NEON doesn't support masked loads/stores/gathers/scatters, but SVE does
1532-
for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
1533-
MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1534-
MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1529+
// NEON doesn't support masked loads/stores, but SME and SVE do.
1530+
for (auto VT :
1531+
{MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
1532+
MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1533+
MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
15351534
setOperationAction(ISD::MLOAD, VT, Custom);
15361535
setOperationAction(ISD::MSTORE, VT, Custom);
1537-
setOperationAction(ISD::MGATHER, VT, Custom);
1538-
setOperationAction(ISD::MSCATTER, VT, Custom);
15391536
}
15401537

15411538
// Firstly, exclude all scalable vector extending loads/truncating stores,
@@ -1576,8 +1573,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
15761573
MVT::nxv4f32, MVT::nxv2f64}) {
15771574
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
15781575
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1579-
setOperationAction(ISD::MGATHER, VT, Custom);
1580-
setOperationAction(ISD::MSCATTER, VT, Custom);
15811576
setOperationAction(ISD::MLOAD, VT, Custom);
15821577
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
15831578
setOperationAction(ISD::SELECT, VT, Custom);
@@ -1611,8 +1606,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
16111606
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
16121607
setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Custom);
16131608
setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Custom);
1614-
if (Subtarget->isSVEAvailable())
1615-
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
16161609
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
16171610
setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
16181611
setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
@@ -1650,8 +1643,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
16501643

16511644
for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
16521645
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1653-
setOperationAction(ISD::MGATHER, VT, Custom);
1654-
setOperationAction(ISD::MSCATTER, VT, Custom);
16551646
setOperationAction(ISD::MLOAD, VT, Custom);
16561647
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
16571648
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
@@ -1675,18 +1666,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
16751666
setOperationAction(ISD::MUL, MVT::v1i64, Custom);
16761667
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
16771668

1678-
if (Subtarget->isSVEAvailable()) {
1679-
// NEON doesn't support across-vector reductions, but SVE does.
1680-
for (auto VT :
1681-
{MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
1682-
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1683-
}
1684-
1685-
// Histcnt is SVE2 only
1686-
if (Subtarget->hasSVE2() && Subtarget->isSVEAvailable())
1687-
setOperationAction(ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, MVT::Other,
1688-
Custom);
1689-
16901669
// NOTE: Currently this has to happen after computeRegisterProperties rather
16911670
// than the preferred option of combining it with the addRegisterClass call.
16921671
if (Subtarget->useSVEForFixedLengthVectors()) {
@@ -1762,6 +1741,31 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
17621741
setOperationAction(ISD::INTRINSIC_WO_CHAIN, VT, Custom);
17631742
}
17641743

1744+
// Handle operations that are only available in non-streaming SVE mode.
1745+
if (Subtarget->isSVEAvailable()) {
1746+
for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64,
1747+
MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1748+
MVT::nxv4f32, MVT::nxv2f64, MVT::nxv2bf16, MVT::nxv4bf16,
1749+
MVT::nxv8bf16, MVT::v4f16, MVT::v8f16, MVT::v2f32,
1750+
MVT::v4f32, MVT::v1f64, MVT::v2f64, MVT::v8i8,
1751+
MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1752+
MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1753+
setOperationAction(ISD::MGATHER, VT, Custom);
1754+
setOperationAction(ISD::MSCATTER, VT, Custom);
1755+
}
1756+
1757+
for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1758+
MVT::nxv4f32, MVT::nxv2f64, MVT::v4f16, MVT::v8f16,
1759+
MVT::v2f32, MVT::v4f32, MVT::v2f64})
1760+
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1761+
1762+
// Histcnt is SVE2 only
1763+
if (Subtarget->hasSVE2())
1764+
setOperationAction(ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, MVT::Other,
1765+
Custom);
1766+
}
1767+
1768+
17651769
if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
17661770
// Only required for llvm.aarch64.mops.memset.tag
17671771
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
@@ -6986,7 +6990,7 @@ bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
69866990

69876991
// NEON-sized vectors can be emulated using SVE instructions.
69886992
if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
6989-
return Subtarget->hasSVEorSME();
6993+
return Subtarget->isSVEorStreamingSVEAvailable();
69906994

69916995
// Ensure NEON MVTs only belong to a single register class.
69926996
if (VT.getFixedSizeInBits() <= 128)

llvm/lib/Target/AArch64/AArch64Subtarget.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,12 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
185185
(hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
186186
}
187187

188+
/// Returns true if the target has access to either the full range of SVE instructions,
189+
/// or the streaming-compatible subset of SVE instructions.
190+
bool isSVEorStreamingSVEAvailable() const {
191+
return hasSVE() || (hasSME() && isStreaming());
192+
}
193+
188194
unsigned getMinVectorRegisterBitWidth() const {
189195
// Don't assume any minimum vector size when PSTATE.SM may not be 0, because
190196
// we don't yet support streaming-compatible codegen support that we trust
@@ -374,11 +380,11 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
374380
}
375381

376382
bool useSVEForFixedLengthVectors() const {
377-
if (!isNeonAvailable())
378-
return hasSVEorSME();
383+
if (!isSVEorStreamingSVEAvailable())
384+
return false;
379385

380386
// Prefer NEON unless larger SVE registers are available.
381-
return hasSVEorSME() && getMinSVEVectorSizeInBits() >= 256;
387+
return !isNeonAvailable() || getMinSVEVectorSizeInBits() >= 256;
382388
}
383389

384390
bool useSVEForFixedLengthVectors(EVT VT) const {

llvm/lib/Target/AArch64/AArch64TargetMachine.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,11 @@ static cl::opt<unsigned> SVEVectorBitsMinOpt(
187187
"with zero meaning no minimum size is assumed."),
188188
cl::init(0), cl::Hidden);
189189

190+
static cl::opt<bool> ForceStreaming(
191+
"force-streaming",
192+
cl::desc("Force the use of streaming code for all functions"),
193+
cl::init(false), cl::Hidden);
194+
190195
static cl::opt<bool> ForceStreamingCompatible(
191196
"force-streaming-compatible",
192197
cl::desc("Force the use of streaming-compatible code for all functions"),
@@ -412,11 +417,11 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
412417
StringRef FS = FSAttr.isValid() ? FSAttr.getValueAsString() : TargetFS;
413418
bool HasMinSize = F.hasMinSize();
414419

415-
bool IsStreaming = F.hasFnAttribute("aarch64_pstate_sm_enabled") ||
420+
bool IsStreaming = ForceStreaming ||
421+
F.hasFnAttribute("aarch64_pstate_sm_enabled") ||
416422
F.hasFnAttribute("aarch64_pstate_sm_body");
417-
bool IsStreamingCompatible =
418-
F.hasFnAttribute("aarch64_pstate_sm_compatible") ||
419-
ForceStreamingCompatible;
423+
bool IsStreamingCompatible = ForceStreamingCompatible ||
424+
F.hasFnAttribute("aarch64_pstate_sm_compatible");
420425

421426
unsigned MinSVEVectorSize = 0;
422427
unsigned MaxSVEVectorSize = 0;

0 commit comments

Comments
 (0)