@@ -985,6 +985,33 @@ static bool isAllActivePredicate(Value *Pred) {
985
985
m_ConstantInt<AArch64SVEPredPattern::all>()));
986
986
}
987
987
988
+ // Simplify unary operation where predicate has all inactive lanes by replacing
989
+ // instruction with zeroed object
990
+ static std::optional<Instruction *>
991
+ instCombineSVENoActiveUnaryZero (InstCombiner &IC, IntrinsicInst &II) {
992
+ if (match (II.getOperand (0 ), m_ZeroInt ())) {
993
+ Constant *Node;
994
+ Type *RetTy = II.getType ();
995
+ if (RetTy->isStructTy ()) {
996
+ auto StructT = cast<StructType>(RetTy);
997
+ auto VecT = StructT->getElementType (0 );
998
+ SmallVector<llvm::Constant *, 4 > ZerVec;
999
+ for (unsigned i = 0 ; i < StructT->getNumElements (); i++) {
1000
+ ZerVec.push_back (VecT->isFPOrFPVectorTy () ? ConstantFP::get (VecT, 0.0 )
1001
+ : ConstantInt::get (VecT, 0 ));
1002
+ }
1003
+ Node = ConstantStruct::get (StructT, ZerVec);
1004
+ } else if (RetTy->isFPOrFPVectorTy ())
1005
+ Node = ConstantFP::get (RetTy, 0.0 );
1006
+ else
1007
+ Node = ConstantInt::get (II.getType (), 0 );
1008
+
1009
+ IC.replaceInstUsesWith (II, Node);
1010
+ return IC.eraseInstFromFunction (II);
1011
+ }
1012
+ return std::nullopt;
1013
+ }
1014
+
988
1015
static std::optional<Instruction *> instCombineSVESel (InstCombiner &IC,
989
1016
IntrinsicInst &II) {
990
1017
// svsel(ptrue, x, y) => x
@@ -1398,6 +1425,10 @@ instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
1398
1425
Value *PtrOp = II.getOperand (1 );
1399
1426
Type *VecTy = II.getType ();
1400
1427
1428
+ // Replace by zero constant when all lanes are inactive
1429
+ if (auto II_NA = instCombineSVENoActiveUnaryZero (IC, II))
1430
+ return II_NA;
1431
+
1401
1432
if (isAllActivePredicate (Pred)) {
1402
1433
LoadInst *Load = IC.Builder .CreateLoad (VecTy, PtrOp);
1403
1434
Load->copyMetadata (II);
@@ -1745,6 +1776,10 @@ instCombineLD1GatherIndex(InstCombiner &IC, IntrinsicInst &II) {
1745
1776
Type *Ty = II.getType ();
1746
1777
Value *PassThru = ConstantAggregateZero::get (Ty);
1747
1778
1779
+ // Replace by zero constant when all lanes are inactive
1780
+ if (auto II_NA = instCombineSVENoActiveUnaryZero (IC, II))
1781
+ return II_NA;
1782
+
1748
1783
// Contiguous gather => masked load.
1749
1784
// (sve.ld1.gather.index Mask BasePtr (sve.index IndexBase 1))
1750
1785
// => (masked.load (gep BasePtr IndexBase) Align Mask zeroinitializer)
@@ -1971,6 +2006,41 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
1971
2006
switch (IID) {
1972
2007
default :
1973
2008
break ;
2009
+
2010
+ case Intrinsic::aarch64_sve_ld1_gather:
2011
+ case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
2012
+ case Intrinsic::aarch64_sve_ld1_gather_sxtw:
2013
+ case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
2014
+ case Intrinsic::aarch64_sve_ld1_gather_uxtw:
2015
+ case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
2016
+ case Intrinsic::aarch64_sve_ld1q_gather_index:
2017
+ case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
2018
+ case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
2019
+ case Intrinsic::aarch64_sve_ld1ro:
2020
+ case Intrinsic::aarch64_sve_ld1rq:
2021
+ case Intrinsic::aarch64_sve_ld1udq:
2022
+ case Intrinsic::aarch64_sve_ld1uwq:
2023
+ case Intrinsic::aarch64_sve_ld2_sret:
2024
+ case Intrinsic::aarch64_sve_ld2q_sret:
2025
+ case Intrinsic::aarch64_sve_ld3_sret:
2026
+ case Intrinsic::aarch64_sve_ld3q_sret:
2027
+ case Intrinsic::aarch64_sve_ld4_sret:
2028
+ case Intrinsic::aarch64_sve_ld4q_sret:
2029
+ case Intrinsic::aarch64_sve_ldff1:
2030
+ case Intrinsic::aarch64_sve_ldff1_gather:
2031
+ case Intrinsic::aarch64_sve_ldff1_gather_index:
2032
+ case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
2033
+ case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
2034
+ case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
2035
+ case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
2036
+ case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
2037
+ case Intrinsic::aarch64_sve_ldnf1:
2038
+ case Intrinsic::aarch64_sve_ldnt1:
2039
+ case Intrinsic::aarch64_sve_ldnt1_gather:
2040
+ case Intrinsic::aarch64_sve_ldnt1_gather_index:
2041
+ case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
2042
+ case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
2043
+ return instCombineSVENoActiveUnaryZero (IC, II);
1974
2044
case Intrinsic::aarch64_neon_fmaxnm:
1975
2045
case Intrinsic::aarch64_neon_fminnm:
1976
2046
return instCombineMaxMinNM (IC, II);
0 commit comments