[AArch64][SVE] optimisation for unary SVE intrinsics with no active lanes #86651

Lukacma · 2024-03-26T11:11:32Z

This patch extends #73964 and adds optimisation of unary intrinsics.

…ve lanes

llvmbot · 2024-03-26T11:12:03Z

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-aarch64

Author: None (Lukacma)

Changes

This patch extends #73964 and adds optimisation of unary intrinsics.

Patch is 169.09 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/86651.diff

2 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+369)
(modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll (+3291-1)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index af0b6873d170dd..40bd17053d34cd 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -923,6 +923,80 @@ static bool isAllActivePredicate(Value *Pred) {
                          m_ConstantInt<AArch64SVEPredPattern::all>()));
 }
 
+// Simplify unary operation where predicate has all inactive lanes by replacing
+// instruction with its operand
+static std::optional<Instruction *>
+instCombineSVENoActiveUnaryReplace(InstCombiner &IC, IntrinsicInst &II,
+                                   bool hasInactiveVector) {
+  int PredOperand = hasInactiveVector ? 1 : 0;
+  int ReplaceOperand = hasInactiveVector ? 0 : 1;
+  if (match(II.getOperand(PredOperand), m_ZeroInt())) {
+    IC.replaceInstUsesWith(II, II.getOperand(ReplaceOperand));
+    return IC.eraseInstFromFunction(II);
+  }
+  return std::nullopt;
+}
+
+// Simplify unary operation where predicate has all inactive lanes by replacing
+// instruction with zeroed object
+static std::optional<Instruction *>
+instCombineSVENoActiveUnaryZero(InstCombiner &IC, IntrinsicInst &II) {
+  if (match(II.getOperand(0), m_ZeroInt())) {
+    Constant *Node;
+    Type *RetTy = II.getType();
+    if (RetTy->isStructTy()){
+        auto StructT = cast<StructType>(RetTy);
+        auto VecT = StructT->getElementType(0);
+        SmallVector<llvm::Constant*, 4> ZerVec;
+        for (unsigned i = 0; i < StructT->getNumElements(); i++){
+          ZerVec.push_back(VecT->isFPOrFPVectorTy() ? ConstantFP::get(VecT, 0.0): 
+                              ConstantInt::get(VecT, 0));
+        }
+        Node = ConstantStruct::get(StructT, ZerVec);
+    }
+    else if (RetTy->isFPOrFPVectorTy())
+      Node = ConstantFP::get(RetTy, 0.0);
+    else 
+      Node = ConstantInt::get(II.getType(), 0);
+    
+    IC.replaceInstUsesWith(II, Node);
+    return IC.eraseInstFromFunction(II);
+  }
+  return std::nullopt;
+}
+
+//Erase unary operation where predicate has all inactive lanes
+static std::optional<Instruction *>
+instCombineSVENoActiveUnaryErase(InstCombiner &IC, IntrinsicInst &II, int PredPos) {
+    if (match(II.getOperand(PredPos), m_ZeroInt())) {
+      return IC.eraseInstFromFunction(II);
+    }
+    return std::nullopt;
+}
+
+// Simplify unary operation where predicate has all inactive lanes by replacing
+// instruction with given constant
+static std::optional<Instruction *>
+instCombineSVENoActiveUnaryConstant(InstCombiner &IC, IntrinsicInst &II, Constant *NewVal) {
+    if (match(II.getOperand(0), m_ZeroInt())) {
+      IC.replaceInstUsesWith(II, NewVal);
+      return IC.eraseInstFromFunction(II);
+    }
+    return std::nullopt;
+}
+
+// Simplify unary operation where predicate has all inactive lanes or try to replace
+// with  _x form when all lanes are active
+static std::optional<Instruction *>
+instCombineSVEAllOrNoActiveUnary(InstCombiner &IC, IntrinsicInst &II) {
+  if (isAllActivePredicate(II.getOperand(1))
+      && !isa<llvm::UndefValue>(II.getOperand(0))){
+    Value *Undef = llvm::UndefValue::get(II.getType());
+    return IC.replaceOperand(II, 0, Undef);
+  }
+  return instCombineSVENoActiveUnaryReplace(IC, II, true);
+}
+
 static std::optional<Instruction *> instCombineSVESel(InstCombiner &IC,
                                                       IntrinsicInst &II) {
   // svsel(ptrue, x, y) => x
@@ -937,6 +1011,12 @@ static std::optional<Instruction *> instCombineSVESel(InstCombiner &IC,
 
 static std::optional<Instruction *> instCombineSVEDup(InstCombiner &IC,
                                                       IntrinsicInst &II) {
+  
+  // Optimize when predicate is known all active or all inactive
+  if (auto II_NA =
+        instCombineSVEAllOrNoActiveUnary(IC, II))
+    return II_NA;
+
   IntrinsicInst *Pg = dyn_cast<IntrinsicInst>(II.getArgOperand(1));
   if (!Pg)
     return std::nullopt;
@@ -971,6 +1051,12 @@ static std::optional<Instruction *> instCombineSVEDupX(InstCombiner &IC,
 
 static std::optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
                                                         IntrinsicInst &II) {
+  
+  //Replace by zero constant when all lanes are inactive
+  if (auto II_NA =
+          instCombineSVENoActiveUnaryZero(IC, II))
+      return II_NA;
+
   LLVMContext &Ctx = II.getContext();
 
   // Check that the predicate is all active
@@ -1156,6 +1242,11 @@ static std::optional<Instruction *> instCombineSVECondLast(InstCombiner &IC,
   Value *Vec = II.getArgOperand(2);
   Type *Ty = II.getType();
 
+  //If all lanes are inactive replace with operand
+  if (auto II_NA =
+          instCombineSVENoActiveUnaryReplace(IC, II, false))
+    return II_NA;
+
   if (!Ty->isIntegerTy())
     return std::nullopt;
 
@@ -1336,6 +1427,11 @@ instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
   Value *PtrOp = II.getOperand(1);
   Type *VecTy = II.getType();
 
+  //Replace by zero constant when all lanes are inactive
+  if (auto II_NA =
+        instCombineSVENoActiveUnaryZero(IC, II))
+    return II_NA;
+
   if (isAllActivePredicate(Pred)) {
     LoadInst *Load = IC.Builder.CreateLoad(VecTy, PtrOp);
     Load->copyMetadata(II);
@@ -1355,6 +1451,11 @@ instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
   Value *Pred = II.getOperand(1);
   Value *PtrOp = II.getOperand(2);
 
+  //Remove when all lanes are inactive
+  if (auto II_NA =
+          instCombineSVENoActiveUnaryErase(IC, II, 0))
+      return II_NA;
+
   if (isAllActivePredicate(Pred)) {
     StoreInst *Store = IC.Builder.CreateStore(VecOp, PtrOp);
     Store->copyMetadata(II);
@@ -1653,6 +1754,11 @@ instCombineLD1GatherIndex(InstCombiner &IC, IntrinsicInst &II) {
   Type *Ty = II.getType();
   Value *PassThru = ConstantAggregateZero::get(Ty);
 
+  //Replace by zero constant when all lanes are inactive
+  if (auto II_NA =
+          instCombineSVENoActiveUnaryZero(IC, II))
+      return II_NA;
+
   // Contiguous gather => masked load.
   // (sve.ld1.gather.index Mask BasePtr (sve.index IndexBase 1))
   // => (masked.load (gep BasePtr IndexBase) Align Mask zeroinitializer)
@@ -1683,6 +1789,11 @@ instCombineST1ScatterIndex(InstCombiner &IC, IntrinsicInst &II) {
   Value *Index = II.getOperand(3);
   Type *Ty = Val->getType();
 
+  //Remove when all lanes are inactive
+  if (auto II_NA =
+          instCombineSVENoActiveUnaryErase(IC, II, 0))
+      return II_NA;
+
   // Contiguous scatter => masked store.
   // (sve.st1.scatter.index Value Mask BasePtr (sve.index IndexBase 1))
   // => (masked.store Value (gep BasePtr IndexBase) Align Mask)
@@ -1879,6 +1990,264 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
   switch (IID) {
   default:
     break;
+
+  case Intrinsic::aarch64_sve_abs:
+  case Intrinsic::aarch64_sve_bfcvt_x2:
+  case Intrinsic::aarch64_sve_cls:
+  case Intrinsic::aarch64_sve_clz:
+  case Intrinsic::aarch64_sve_cnot:
+  case Intrinsic::aarch64_sve_cnt:
+  case Intrinsic::aarch64_sve_fabs:
+  case Intrinsic::aarch64_sve_fcvt:
+  case Intrinsic::aarch64_sve_fcvt_x2:
+  case Intrinsic::aarch64_sve_fcvtn_x2:
+  case Intrinsic::aarch64_sve_fcvtzs_x2:
+  case Intrinsic::aarch64_sve_fcvtzs_x4:
+  case Intrinsic::aarch64_sve_fcvtzu_x2:
+  case Intrinsic::aarch64_sve_fcvtzu_x4:
+  case Intrinsic::aarch64_sve_fcvtzs:
+  case Intrinsic::aarch64_sve_fcvtzs_i32f16:
+  case Intrinsic::aarch64_sve_fcvtzs_i64f16:
+  case Intrinsic::aarch64_sve_fcvtzs_i64f32:
+  case Intrinsic::aarch64_sve_fcvt_bf16f32:
+  case Intrinsic::aarch64_sve_fcvtnt_bf16f32:
+  case Intrinsic::aarch64_sve_fcvtzs_i32f64:
+  case Intrinsic::aarch64_sve_fcvtzu:
+  case Intrinsic::aarch64_sve_fcvtzu_i32f16:
+  case Intrinsic::aarch64_sve_fcvtzu_i64f16:
+  case Intrinsic::aarch64_sve_fcvtzu_i64f32:
+  case Intrinsic::aarch64_sve_fcvtzu_i32f64:
+  case Intrinsic::aarch64_sve_fcvt_f16f32:
+  case Intrinsic::aarch64_sve_fcvt_f16f64:
+  case Intrinsic::aarch64_sve_fcvt_f32f16:
+  case Intrinsic::aarch64_sve_fcvt_f32f64:
+  case Intrinsic::aarch64_sve_fcvt_f64f16:
+  case Intrinsic::aarch64_sve_fcvt_f64f32:
+  case Intrinsic::aarch64_sve_fcvtlt_f32f16:
+  case Intrinsic::aarch64_sve_fcvtlt_f64f32:
+  case Intrinsic::aarch64_sve_fcvtx_f32f64:
+  case Intrinsic::aarch64_sve_fcvtnt_f16f32:
+  case Intrinsic::aarch64_sve_fcvtnt_f32f64:
+  case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
+  case Intrinsic::aarch64_sve_flogb:
+  case Intrinsic::aarch64_sve_fmaxp:
+  case Intrinsic::aarch64_sve_fminp:
+  case Intrinsic::aarch64_sve_fneg:
+  case Intrinsic::aarch64_sve_frecpx:
+  case Intrinsic::aarch64_sve_frinta:
+  case Intrinsic::aarch64_sve_frinti:
+  case Intrinsic::aarch64_sve_frintm:
+  case Intrinsic::aarch64_sve_frintn:
+  case Intrinsic::aarch64_sve_frintp:
+  case Intrinsic::aarch64_sve_frintx:
+  case Intrinsic::aarch64_sve_frintz:
+  case Intrinsic::aarch64_sve_fscale:
+  case Intrinsic::aarch64_sve_fsqrt:
+  case Intrinsic::aarch64_sve_neg:
+  case Intrinsic::aarch64_sve_not:
+  case Intrinsic::aarch64_sve_rbit:
+  case Intrinsic::aarch64_sve_revb:
+  case Intrinsic::aarch64_sve_revh:
+  case Intrinsic::aarch64_sve_revw:
+  case Intrinsic::aarch64_sve_revd:
+  case Intrinsic::aarch64_sve_scvtf:
+  case Intrinsic::aarch64_sve_scvtf_f16i32:
+  case Intrinsic::aarch64_sve_scvtf_f16i64:
+  case Intrinsic::aarch64_sve_scvtf_f32i64:
+  case Intrinsic::aarch64_sve_scvtf_f64i32:
+  case Intrinsic::aarch64_sve_scvtf_x2:
+  case Intrinsic::aarch64_sve_scvtf_x4:
+  case Intrinsic::aarch64_sve_ucvtf:
+  case Intrinsic::aarch64_sve_ucvtf_f16i32:
+  case Intrinsic::aarch64_sve_ucvtf_f16i64:
+  case Intrinsic::aarch64_sve_ucvtf_f32i64:
+  case Intrinsic::aarch64_sve_ucvtf_f64i32:
+  case Intrinsic::aarch64_sve_ucvtf_x2:
+  case Intrinsic::aarch64_sve_ucvtf_x4:
+  case Intrinsic::aarch64_sve_sqabs:
+  case Intrinsic::aarch64_sve_sqneg:
+  case Intrinsic::aarch64_sve_sqrshl:
+  case Intrinsic::aarch64_sve_sqshl:
+  case Intrinsic::aarch64_sve_sqshlu:
+  case Intrinsic::aarch64_sve_sxtb:
+  case Intrinsic::aarch64_sve_sxth:
+  case Intrinsic::aarch64_sve_sxtw:
+  case Intrinsic::aarch64_sve_urecpe:
+  case Intrinsic::aarch64_sve_ursqrte:
+  case Intrinsic::aarch64_sve_uxtb:
+  case Intrinsic::aarch64_sve_uxth:
+  case Intrinsic::aarch64_sve_uxtw:
+    return instCombineSVEAllOrNoActiveUnary(IC, II);
+  case Intrinsic::aarch64_sve_brka:
+  case Intrinsic::aarch64_sve_brkb:
+  case Intrinsic::aarch64_sve_sqdecp:
+  case Intrinsic::aarch64_sve_uqdecp:
+  case Intrinsic::aarch64_sve_sqdecp_n32:
+  case Intrinsic::aarch64_sve_sqdecp_n64:
+    return instCombineSVENoActiveUnaryReplace(IC, II, true);
+  case Intrinsic::aarch64_sve_asrd:
+  case Intrinsic::aarch64_sve_clasta:
+  case Intrinsic::aarch64_sve_clastb:
+  case Intrinsic::aarch64_sve_pfirst:
+    return instCombineSVENoActiveUnaryReplace(IC, II, false);
+  case Intrinsic::aarch64_sve_addqv:
+  case Intrinsic::aarch64_sve_brka_z:
+  case Intrinsic::aarch64_sve_brkb_z:
+  case Intrinsic::aarch64_sve_brkn_z:
+  case Intrinsic::aarch64_sve_brkpa_z:
+  case Intrinsic::aarch64_sve_brkpb_z:
+  case Intrinsic::aarch64_sve_cmpeq:
+  case Intrinsic::aarch64_sve_cmpge:
+  case Intrinsic::aarch64_sve_cmpgt:
+  case Intrinsic::aarch64_sve_cmphi:
+  case Intrinsic::aarch64_sve_cmphs:
+  case Intrinsic::aarch64_sve_cmpeq_wide:
+  case Intrinsic::aarch64_sve_cmpge_wide:
+  case Intrinsic::aarch64_sve_cmpgt_wide:
+  case Intrinsic::aarch64_sve_cmphi_wide:
+  case Intrinsic::aarch64_sve_cmphs_wide:
+  case Intrinsic::aarch64_sve_cmple_wide:
+  case Intrinsic::aarch64_sve_cmplt_wide:
+  case Intrinsic::aarch64_sve_cmplo_wide:
+  case Intrinsic::aarch64_sve_cmpls_wide:
+  case Intrinsic::aarch64_sve_cntp:
+  case Intrinsic::aarch64_sve_compact:
+  case Intrinsic::aarch64_sve_eorv:
+  case Intrinsic::aarch64_sve_eorqv:
+  case Intrinsic::aarch64_sve_facge:
+  case Intrinsic::aarch64_sve_facgt:
+  case Intrinsic::aarch64_sve_faddv:
+  case Intrinsic::aarch64_sve_fcmpeq:
+  case Intrinsic::aarch64_sve_fcmpne:
+  case Intrinsic::aarch64_sve_fcmpge:
+  case Intrinsic::aarch64_sve_fcmpgt:
+  case Intrinsic::aarch64_sve_fcmpuo:
+  case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
+  case Intrinsic::aarch64_sve_ld1_gather:
+  case Intrinsic::aarch64_sve_ld1_gather_sxtw:
+  case Intrinsic::aarch64_sve_ld1_gather_uxtw:
+  case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
+  case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
+  case Intrinsic::aarch64_sve_ld1_pn_x2:
+  case Intrinsic::aarch64_sve_ld1_pn_x4:
+  case Intrinsic::aarch64_sve_ld1rq:
+  case Intrinsic::aarch64_sve_ld1ro:
+  case Intrinsic::aarch64_sve_ld1uwq:
+  case Intrinsic::aarch64_sve_ld1udq:
+  case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
+  case Intrinsic::aarch64_sve_ld1q_gather_index:
+  case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
+  case Intrinsic::aarch64_sve_ld2_sret:
+  case Intrinsic::aarch64_sve_ld2q_sret:
+  case Intrinsic::aarch64_sve_ld3_sret:
+  case Intrinsic::aarch64_sve_ld3q_sret:
+  case Intrinsic::aarch64_sve_ld4_sret:
+  case Intrinsic::aarch64_sve_ld4q_sret:
+  case Intrinsic::aarch64_sve_ldff1:
+  case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
+  case Intrinsic::aarch64_sve_ldff1_gather:
+  case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
+  case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
+  case Intrinsic::aarch64_sve_ldff1_gather_index:
+  case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
+  case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
+  case Intrinsic::aarch64_sve_ldnf1:
+  case Intrinsic::aarch64_sve_ldnt1:
+  case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
+  case Intrinsic::aarch64_sve_ldnt1_gather:
+  case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
+  case Intrinsic::aarch64_sve_ldnt1_gather_index:
+  case Intrinsic::aarch64_sve_orv:
+  case Intrinsic::aarch64_sve_orqv:
+  case Intrinsic::aarch64_sve_rdffr_z:
+  case Intrinsic::aarch64_sve_saddv:
+  case Intrinsic::aarch64_sve_uaddv:
+  case Intrinsic::aarch64_sve_umaxv:
+  case Intrinsic::aarch64_sve_umaxqv:
+    return instCombineSVENoActiveUnaryZero(IC, II);
+  case Intrinsic::aarch64_sve_andqv:
+  case Intrinsic::aarch64_sve_andv:
+    return instCombineSVENoActiveUnaryConstant(IC, II, 
+            ConstantInt::get(II.getType(), 1));
+  case Intrinsic::aarch64_sve_fmaxnmqv:
+  case Intrinsic::aarch64_sve_fmaxnmv:
+  case Intrinsic::aarch64_sve_fminnmqv:
+  case Intrinsic::aarch64_sve_fminnmv:
+    return instCombineSVENoActiveUnaryConstant(IC, II,  
+            ConstantFP::getQNaN(II.getType()));
+  case Intrinsic::aarch64_sve_fmaxqv:
+  case Intrinsic::aarch64_sve_fmaxv:
+    return instCombineSVENoActiveUnaryConstant(IC, II, 
+            ConstantFP::getInfinity(II.getType(), true));
+  case Intrinsic::aarch64_sve_fminqv:
+  case Intrinsic::aarch64_sve_fminv:
+    return instCombineSVENoActiveUnaryConstant(IC, II, 
+            ConstantFP::getInfinity(II.getType()));
+  case Intrinsic::aarch64_sve_prf:
+  case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
+  case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
+  case Intrinsic::aarch64_sve_prfb_gather_index:
+  case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
+  case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
+  case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
+  case Intrinsic::aarch64_sve_prfh_gather_index:
+  case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
+  case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
+  case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
+  case Intrinsic::aarch64_sve_prfw_gather_index:
+  case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
+  case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
+  case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
+  case Intrinsic::aarch64_sve_prfd_gather_index:
+  case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
+    return instCombineSVENoActiveUnaryErase(IC, II, 0);
+  case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
+  case Intrinsic::aarch64_sve_st1_scatter:
+  case Intrinsic::aarch64_sve_st1_scatter_sxtw:
+  case Intrinsic::aarch64_sve_st1_scatter_uxtw:
+  case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
+  case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
+  case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
+  case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
+  case Intrinsic::aarch64_sve_st1q_scatter_index:
+  case Intrinsic::aarch64_sve_st1dq:
+  case Intrinsic::aarch64_sve_st1wq:
+  case Intrinsic::aarch64_sve_stnt1:
+  case Intrinsic::aarch64_sve_stnt1_scatter:
+  case Intrinsic::aarch64_sve_stnt1_scatter_index:
+  case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
+  case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
+    return instCombineSVENoActiveUnaryErase(IC, II, 1);
+  case Intrinsic::aarch64_sve_st2:
+  case Intrinsic::aarch64_sve_st2q:
+    return instCombineSVENoActiveUnaryErase(IC, II, 2);
+  case Intrinsic::aarch64_sve_st3:
+  case Intrinsic::aarch64_sve_st3q:
+    return instCombineSVENoActiveUnaryErase(IC, II, 3);
+  case Intrinsic::aarch64_sve_st4:
+  case Intrinsic::aarch64_sve_st4q:
+    return instCombineSVENoActiveUnaryErase(IC, II, 4);
+  case Intrinsic::aarch64_sve_smaxv:
+  case Intrinsic::aarch64_sve_smaxqv:
+  {
+    auto *MinSInt = ConstantInt::get(II.getType(), APInt::getSignedMinValue(
+                      II.getType()->getScalarSizeInBits()));
+    return instCombineSVENoActiveUnaryConstant(IC, II, MinSInt);
+  }
+  case Intrinsic::aarch64_sve_sminv:
+  case Intrinsic::aarch64_sve_sminqv:
+  {
+    auto *MaxSInt = ConstantInt::get(II.getType(), APInt::getSignedMaxValue(
+                      II.getType()->getScalarSizeInBits()));
+    return instCombineSVENoActiveUnaryConstant(IC, II, MaxSInt);
+  }
+  case Intrinsic::aarch64_sve_uminv:
+  case Intrinsic::aarch64_sve_uminqv:
+  {
+    auto *MaxUInt = ConstantInt::get(II.getType(), APInt::getMaxValue(
+                      II.getType()->getScalarSizeInBits()));
+    return instCombineSVENoActiveUnaryConstant(IC, II, MaxUInt);
+  }
   case Intrinsic::aarch64_neon_fmaxnm:
   case Intrinsic::aarch64_neon_fminnm:
     return instCombineMaxMinNM(IC, II);
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll
index 463a5f5d2cfb5c..57372c46eecf2f 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll
@@ -1321,4 +1321,3294 @@ define <vscale x 2 x i64> @replace_uqsub_intrinsic_i64(<vscale x 2 x i64> %a, <v
   ret <vscale x 2 x i64> %1
 }
 
-attributes #0 = { "target-features"="+sve,+sve2" }
+define dso_local <vscale x 16 x i8> @test_svabs_m(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)  #0 {
+; CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svabs_m(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+entry:
+  %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %0
+}
+
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>) #1
+
+
+define dso_local <vscale x 8 x half> @test_svabs_z(<vscale x 8 x half> %b)  #0 {
+; CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svabs_z(
+; CHECK-SAME: <vscale x 8 x half> [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <vscale x 8 x half> zeroinitializer
+;
+entry:
+  %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %0
+}
+
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>) #1
+
+
+define dso_local <vscale x 16 x i8> @test_svabs_m2(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)  #0 {
+; CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svabs_m2(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1...
[truncated]

github-actions · 2024-03-26T11:16:38Z

✅ With the latest revision this PR passed the C/C++ code formatter.

paulwalker-arm

This patch is far too big for my liking. I normally wouldn't mind but there's ~4000 lines of tests and grouping everything together just means we'll loose everything if one part forces a revert. Please can you break it into logically separate PRs. Say unary arithmetic/logical ops, fp ops, reductions, loads, stores? or perhaps a PR per new instCombineSVE... function?

In this instance given the number of tests it would also be better to commit the tests separately so reviewers can better see the behaviour resulting from the new combines. I don't mind the tests landing as just one big PR and given they're just tests you can also bypass review to speed things up if you want. I do recommend breaking the testing into separate files though so as to make better use of test parallelism. You'll see we already do this for the CodeGen SVE intrinsic tests. There's no need to go overboard just logical file splits like int, fp, load/store.

paulwalker-arm · 2024-04-04T10:14:16Z

llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll

+attributes #0 = {"target-features"="+bf16,+f64mm,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2p1,+v8a,-fmv"}
+attributes #1 = {"target-features"="+bf16,+f64mm,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2p1,+v8a,-fmv"}
+attributes #2 = {"target-features"="+bf16,+f64mm,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2p1,+v8a,-fmv"}
+attributes #3 = {"target-features"="+bf16,+f64mm,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2p1,+v8a,-fmv"}
+attributes #4 = {"target-features"="+bf16,+f64mm,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2p1,+v8a,-fmv"}


These attribute look identical? They also look to contain features the test don't need (e.g. neon, outline-atomics to name but a few).

[AArch64][SVE] Add optimisation for unary SVE intrinsics with no acti…

d871315

…ve lanes

llvmbot added backend:AArch64 llvm:transforms labels Mar 26, 2024

Lukacma added 2 commits March 26, 2024 11:19

fixed formatting

686e7c0

Updated failing testcase

d089404

david-arm requested review from paulwalker-arm and brads55 March 26, 2024 14:45

mgabka self-requested a review April 2, 2024 12:24

paulwalker-arm reviewed Apr 4, 2024

View reviewed changes

This was referenced Aug 14, 2024

[AArch64] optimise SVE prefetch intrinsics with no active lanes #103052

Merged

[AArch64] optimise SVE cmp intrinsics with no active lanes #104779

Merged

[AArch64] optimise SVE cvt intrinsics with no active lanes #104809

Merged

This was referenced Sep 4, 2024

[AArch64] replace SVE intrinsics with no active lanes with constant #107266

Open

[AArch64] replace SVE intrinsics with no active lanes with zero #107413

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AArch64][SVE] optimisation for unary SVE intrinsics with no active lanes #86651

[AArch64][SVE] optimisation for unary SVE intrinsics with no active lanes #86651

Uh oh!

Lukacma commented Mar 26, 2024

Uh oh!

llvmbot commented Mar 26, 2024 •

edited

Loading

Uh oh!

github-actions bot commented Mar 26, 2024 •

edited

Loading

Uh oh!

paulwalker-arm left a comment

Uh oh!

paulwalker-arm Apr 4, 2024

Uh oh!

Uh oh!

[AArch64][SVE] optimisation for unary SVE intrinsics with no active lanes #86651

Are you sure you want to change the base?

[AArch64][SVE] optimisation for unary SVE intrinsics with no active lanes #86651

Uh oh!

Conversation

Lukacma commented Mar 26, 2024

Uh oh!

llvmbot commented Mar 26, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Mar 26, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

paulwalker-arm left a comment

Choose a reason for hiding this comment

Uh oh!

paulwalker-arm Apr 4, 2024

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvmbot commented Mar 26, 2024 •

edited

Loading

github-actions bot commented Mar 26, 2024 •

edited

Loading