[AArch64][SVE] Convert SRSHL to LSL when the fed from an ABS intrinsic

brads55 · brads55 · commit 5f4541fefbfc · 2022-05-19T14:07:59.000Z
Differential Revision: https://reviews.llvm.org/D125233
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1229,6 +1229,42 @@ static Optional<Instruction *> instCombineMaxMinNM(InstCombiner &IC,
   return None;
 }
 
+static Optional<Instruction *> instCombineSVESrshl(InstCombiner &IC,
+                                                   IntrinsicInst &II) {
+  IRBuilder<> Builder(&II);
+  Value *Pred = II.getOperand(0);
+  Value *Vec = II.getOperand(1);
+  Value *Shift = II.getOperand(2);
+
+  // Convert SRSHL into the simpler LSL intrinsic when fed by an ABS intrinsic.
+  Value *AbsPred, *MergedValue;
+  if (!match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_sqabs>(
+                      m_Value(MergedValue), m_Value(AbsPred), m_Value())) &&
+      !match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_abs>(
+                      m_Value(MergedValue), m_Value(AbsPred), m_Value())))
+
+    return None;
+
+  // Transform is valid if any of the following are true:
+  // * The ABS merge value is an undef or non-negative
+  // * The ABS predicate is all active
+  // * The ABS predicate and the SRSHL predicates are the same
+  if (!isa<UndefValue>(MergedValue) &&
+      !match(MergedValue, m_NonNegative()) &&
+      AbsPred != Pred && !isAllActivePredicate(AbsPred))
+    return None;
+
+  // Only valid when the shift amount is non-negative, otherwise the rounding
+  // behaviour of SRSHL cannot be ignored.
+  if (!match(Shift, m_NonNegative()))
+    return None;
+
+  auto LSL = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_lsl, {II.getType()},
+                                     {Pred, Vec, Shift});
+
+  return IC.replaceInstUsesWith(II, LSL);
+}
+
 Optional<Instruction *>
 AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
                                      IntrinsicInst &II) const {
@@ -1296,6 +1332,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
     return instCombineSVESDIV(IC, II);
   case Intrinsic::aarch64_sve_sel:
     return instCombineSVESel(IC, II);
+  case Intrinsic::aarch64_sve_srshl:
+    return instCombineSVESrshl(IC, II);
   }
 
   return None;
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll
@@ -0,0 +1,150 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <vscale x 8 x i16> @srshl_abs_undef_merge(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
+; CHECK-LABEL: @srshl_abs_undef_merge(
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+;
+  %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
+  %splat = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
+  %shr = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg2, <vscale x 8 x i16> %abs, <vscale x 8 x i16> %splat)
+  ret <vscale x 8 x i16> %shr
+}
+
+define <vscale x 8 x i16> @srshl_abs_zero_merge(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
+; CHECK-LABEL: @srshl_abs_zero_merge(
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+;
+  %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
+  %splat = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
+  %shr = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg2, <vscale x 8 x i16> %abs, <vscale x 8 x i16> %splat)
+  ret <vscale x 8 x i16> %shr
+}
+
+define <vscale x 8 x i16> @srshl_abs_positive_merge(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
+; CHECK-LABEL: @srshl_abs_positive_merge(
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+;
+  %absmerge = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
+  %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %absmerge, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
+  %splat = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
+  %shr = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg2, <vscale x 8 x i16> %abs, <vscale x 8 x i16> %splat)
+  ret <vscale x 8 x i16> %shr
+}
+
+define <vscale x 8 x i16> @srshl_abs_all_active_pred(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %pg2) #0 {
+; CHECK-LABEL: @srshl_abs_all_active_pred(
+; CHECK-NEXT:    [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[A:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+;
+  %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
+  %splat = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
+  %shr = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg2, <vscale x 8 x i16> %abs, <vscale x 8 x i16> %splat)
+  ret <vscale x 8 x i16> %shr
+}
+
+define <vscale x 8 x i16> @srshl_abs_same_pred(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %pg) #0 {
+; CHECK-LABEL: @srshl_abs_same_pred(
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+;
+  %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
+  %splat = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
+  %shr = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %abs, <vscale x 8 x i16> %splat)
+  ret <vscale x 8 x i16> %shr
+}
+
+define <vscale x 8 x i16> @srshl_sqabs(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
+; CHECK-LABEL: @srshl_sqabs(
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+;
+  %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
+  %splat = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
+  %shr = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg2, <vscale x 8 x i16> %abs, <vscale x 8 x i16> %splat)
+  ret <vscale x 8 x i16> %shr
+}
+
+define <vscale x 8 x i16> @srshl_abs_negative_merge(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
+; CHECK-LABEL: @srshl_abs_negative_merge(
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 -1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
+; CHECK-NEXT:    [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[SHR]]
+;
+  %absmerge = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 -1)
+  %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %absmerge, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
+  %splat = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
+  %shr = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg2, <vscale x 8 x i16> %abs, <vscale x 8 x i16> %splat)
+  ret <vscale x 8 x i16> %shr
+}
+
+define <vscale x 8 x i16> @srshl_abs_nonconst_merge(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
+; CHECK-LABEL: @srshl_abs_nonconst_merge(
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
+; CHECK-NEXT:    [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[SHR]]
+;
+  %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
+  %splat = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
+  %shr = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg2, <vscale x 8 x i16> %abs, <vscale x 8 x i16> %splat)
+  ret <vscale x 8 x i16> %shr
+}
+
+define <vscale x 8 x i16> @srshl_abs_not_all_active_pred(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %pg2) #0 {
+; CHECK-LABEL: @srshl_abs_not_all_active_pred(
+; CHECK-NEXT:    [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 8)
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[A:%.*]])
+; CHECK-NEXT:    [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[SHR]]
+;
+  %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 8)
+  %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
+  %splat = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
+  %shr = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg2, <vscale x 8 x i16> %abs, <vscale x 8 x i16> %splat)
+  ret <vscale x 8 x i16> %shr
+}
+
+define <vscale x 8 x i16> @srshl_abs_diff_pred(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
+; CHECK-LABEL: @srshl_abs_diff_pred(
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
+; CHECK-NEXT:    [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[SHR]]
+;
+  %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
+  %splat = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
+  %shr = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg2, <vscale x 8 x i16> %abs, <vscale x 8 x i16> %splat)
+  ret <vscale x 8 x i16> %shr
+}
+
+define <vscale x 8 x i16> @srshl_abs_negative_shift(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
+; CHECK-LABEL: @srshl_abs_negative_shift(
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
+; CHECK-NEXT:    [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 -2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[SHR]]
+;
+  %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
+  %splat = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 -2)
+  %shr = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg2, <vscale x 8 x i16> %abs, <vscale x 8 x i16> %splat)
+  ret <vscale x 8 x i16> %shr
+}
+
+declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 immarg)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+
+attributes #0 = { "target-features"="+sve,+sve2" }