[AArch64] replace SVE intrinsics with no active lanes with constant #107266

Lukacma · 2024-09-04T16:23:08Z

This patch extends #73964 and optimises SVE intrinsics into non-zero constants when predicate is zero.

Lukacma · 2024-09-04T16:23:37Z

This patch comes from splitting #86651 into multiple patches as requested in review of that patch

llvmbot · 2024-09-04T16:23:39Z

@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-transforms

Author: None (Lukacma)

Changes

This patch extends #73964 and optimises SVE intrinsics into non-zero constants when predicate is zero.

Full diff: https://github.com/llvm/llvm-project/pull/107266.diff

2 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+51)
(added) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-to-constant.ll (+158)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index a782c9c4351237..beb80889909677 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1073,6 +1073,18 @@ static bool isAllActivePredicate(Value *Pred) {
                          m_ConstantInt<AArch64SVEPredPattern::all>()));
 }
 
+// Simplify operation where predicate has all inactive lanes by replacing
+// instruction with given constant
+static std::optional<Instruction *>
+instCombineSVENoActiveLanesConstant(InstCombiner &IC, IntrinsicInst &II,
+                                    Constant *NewVal) {
+  if (match(II.getOperand(0), m_ZeroInt())) {
+    IC.replaceInstUsesWith(II, NewVal);
+    return IC.eraseInstFromFunction(II);
+  }
+  return std::nullopt;
+}
+
 // Erase unary operation where predicate has all inactive lanes
 static std::optional<Instruction *>
 instCombineSVENoActiveUnaryErase(InstCombiner &IC, IntrinsicInst &II,
@@ -2131,6 +2143,45 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
   case Intrinsic::aarch64_sve_st4:
   case Intrinsic::aarch64_sve_st4q:
     return instCombineSVENoActiveUnaryErase(IC, II, 4);
+  case Intrinsic::aarch64_sve_andqv:
+  case Intrinsic::aarch64_sve_andv:
+    return instCombineSVENoActiveLanesConstant(
+        IC, II, ConstantInt::getAllOnesValue(II.getType()));
+  case Intrinsic::aarch64_sve_fmaxnmqv:
+  case Intrinsic::aarch64_sve_fmaxnmv:
+  case Intrinsic::aarch64_sve_fminnmqv:
+  case Intrinsic::aarch64_sve_fminnmv:
+    return instCombineSVENoActiveLanesConstant(
+        IC, II, ConstantFP::getNaN(II.getType()));
+  case Intrinsic::aarch64_sve_fmaxqv:
+  case Intrinsic::aarch64_sve_fmaxv:
+    return instCombineSVENoActiveLanesConstant(
+        IC, II, ConstantFP::getInfinity(II.getType(), true));
+  case Intrinsic::aarch64_sve_fminqv:
+  case Intrinsic::aarch64_sve_fminv:
+    return instCombineSVENoActiveLanesConstant(
+        IC, II, ConstantFP::getInfinity(II.getType()));
+  case Intrinsic::aarch64_sve_smaxv:
+  case Intrinsic::aarch64_sve_smaxqv: {
+    auto RetTy = II.getType();
+    auto *MinSInt = ConstantInt::get(
+        RetTy, APInt::getSignedMinValue(RetTy->getScalarSizeInBits()));
+    return instCombineSVENoActiveLanesConstant(IC, II, MinSInt);
+  }
+  case Intrinsic::aarch64_sve_sminv:
+  case Intrinsic::aarch64_sve_sminqv: {
+    auto RetTy = II.getType();
+    auto *MaxSInt = ConstantInt::get(
+        RetTy, APInt::getSignedMaxValue(RetTy->getScalarSizeInBits()));
+    return instCombineSVENoActiveLanesConstant(IC, II, MaxSInt);
+  }
+  case Intrinsic::aarch64_sve_uminv:
+  case Intrinsic::aarch64_sve_uminqv: {
+    auto RetTy = II.getType();
+    auto *MaxUInt = ConstantInt::get(
+        RetTy, APInt::getMaxValue(RetTy->getScalarSizeInBits()));
+    return instCombineSVENoActiveLanesConstant(IC, II, MaxUInt);
+  }
   case Intrinsic::aarch64_sve_ld1_gather:
   case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
   case Intrinsic::aarch64_sve_ld1_gather_sxtw:
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-to-constant.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-to-constant.ll
new file mode 100644
index 00000000000000..9755582845999c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-to-constant.ll
@@ -0,0 +1,158 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+;RUN: opt -S -passes=instcombine < %s | FileCheck %s
+target triple = "aarch64-unknown-linux-gnu"
+
+define <16 x i8> @andqv_i8(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: define <16 x i8> @andqv_i8(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) {
+; CHECK-NEXT:    ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+;
+  %res = call <16 x i8> @llvm.aarch64.sve.andqv.v16i8.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a);
+  ret <16 x i8> %res
+}
+
+define i8 @andv_i8(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: define i8 @andv_i8(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]]) {
+; CHECK-NEXT:    ret i8 -1
+;
+  %res = call i8 @llvm.aarch64.sve.andv.v16i8.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a);
+  ret i8 %res
+}
+
+define <4 x float> @fmaxnmqv_f32(<vscale x 4 x float> %a) {
+; CHECK-LABEL: define <4 x float> @fmaxnmqv_f32(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]]) {
+; CHECK-NEXT:    ret <4 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000>
+;
+  %res = call <4 x float> @llvm.aarch64.sve.fmaxnmqv.v4f32.nxv4f32(<vscale x 4 x i1> zeroinitializer,
+  <vscale x 4 x float> %a)
+  ret <4 x float> %res
+}
+
+define float @fmaxnmv_f32(<vscale x 4 x float> %a) {
+; CHECK-LABEL: define float @fmaxnmv_f32(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]]) {
+; CHECK-NEXT:    ret float 0x7FF8000000000000
+;
+  %res = call float @llvm.aarch64.sve.fmaxnmv.nxv4f32(<vscale x 4 x i1> zeroinitializer,
+  <vscale x 4 x float> %a)
+  ret float %res
+}
+
+define <4 x float> @fminnmqv_f32(<vscale x 4 x float> %a) {
+; CHECK-LABEL: define <4 x float> @fminnmqv_f32(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]]) {
+; CHECK-NEXT:    ret <4 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000>
+;
+  %res = call <4 x float> @llvm.aarch64.sve.fminnmqv.v4f32.nxv4f32(<vscale x 4 x i1> zeroinitializer,
+  <vscale x 4 x float> %a)
+  ret <4 x float> %res
+}
+
+define float @fminnmv_f32(<vscale x 4 x float> %a) {
+; CHECK-LABEL: define float @fminnmv_f32(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]]) {
+; CHECK-NEXT:    ret float 0x7FF8000000000000
+;
+  %res = call float @llvm.aarch64.sve.fminnmv.nxv4f32(<vscale x 4 x i1> zeroinitializer,
+  <vscale x 4 x float> %a)
+  ret float %res
+}
+
+define <2 x double> @fmaxqv_f64(<vscale x 2 x double> %a) {
+; CHECK-LABEL: define <2 x double> @fmaxqv_f64(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]]) {
+; CHECK-NEXT:    ret <2 x double> <double 0xFFF0000000000000, double 0xFFF0000000000000>
+;
+  %res = call <2 x double> @llvm.aarch64.sve.fmaxqv.v2f64.nxv2f64(<vscale x 2 x i1> zeroinitializer,
+  <vscale x 2 x double> %a)
+  ret <2 x double> %res
+}
+
+define double @fmaxv_f64(<vscale x 2 x double> %a) {
+; CHECK-LABEL: define double @fmaxv_f64(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]]) {
+; CHECK-NEXT:    ret double 0xFFF0000000000000
+;
+  %res = call double @llvm.aarch64.sve.fmaxv.nxv2f64(<vscale x 2 x i1> zeroinitializer,
+  <vscale x 2 x double> %a)
+  ret double %res
+}
+
+define <2 x double> @fminqv_f64(<vscale x 2 x double> %a) {
+; CHECK-LABEL: define <2 x double> @fminqv_f64(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]]) {
+; CHECK-NEXT:    ret <2 x double> <double 0x7FF0000000000000, double 0x7FF0000000000000>
+;
+  %res = call <2 x double> @llvm.aarch64.sve.fminqv.v2f64.nxv2f64(<vscale x 2 x i1> zeroinitializer,
+  <vscale x 2 x double> %a)
+  ret <2 x double> %res
+}
+
+define double @fminv_f64(<vscale x 2 x double> %a) {
+; CHECK-LABEL: define double @fminv_f64(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]]) {
+; CHECK-NEXT:    ret double 0x7FF0000000000000
+;
+  %res = call double @llvm.aarch64.sve.fminv.nxv2f64(<vscale x 2 x i1> zeroinitializer,
+  <vscale x 2 x double> %a)
+  ret double %res
+}
+
+define i16 @smaxv_i16(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: define i16 @smaxv_i16(
+; CHECK-SAME: <vscale x 8 x i16> [[A:%.*]]) {
+; CHECK-NEXT:    ret i16 -32768
+;
+  %out = call i16 @llvm.aarch64.sve.smaxv.nxv8i16(<vscale x 8 x i1> zeroinitializer,
+  <vscale x 8 x i16> %a)
+  ret i16 %out
+}
+
+define <8 x i16> @smaxqv_i16(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: define <8 x i16> @smaxqv_i16(
+; CHECK-SAME: <vscale x 8 x i16> [[A:%.*]]) {
+; CHECK-NEXT:    ret <8 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
+;
+  %res = call <8 x i16> @llvm.aarch64.sve.smaxqv.v8i16.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a);
+  ret <8 x i16> %res
+}
+
+define i16 @sminv_i16(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: define i16 @sminv_i16(
+; CHECK-SAME: <vscale x 8 x i16> [[A:%.*]]) {
+; CHECK-NEXT:    ret i16 32767
+;
+  %out = call i16 @llvm.aarch64.sve.sminv.nxv8i16(<vscale x 8 x i1> zeroinitializer,
+  <vscale x 8 x i16> %a)
+  ret i16 %out
+}
+
+define <8 x i16> @sminqv_i16(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: define <8 x i16> @sminqv_i16(
+; CHECK-SAME: <vscale x 8 x i16> [[A:%.*]]) {
+; CHECK-NEXT:    ret <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>
+;
+  %res = call <8 x i16> @llvm.aarch64.sve.sminqv.v8i16.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a);
+  ret <8 x i16> %res
+}
+
+define i16 @uminv_i16(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: define i16 @uminv_i16(
+; CHECK-SAME: <vscale x 8 x i16> [[A:%.*]]) {
+; CHECK-NEXT:    ret i16 -1
+;
+  %out = call i16 @llvm.aarch64.sve.uminv.nxv8i16(<vscale x 8 x i1> zeroinitializer,
+  <vscale x 8 x i16> %a)
+  ret i16 %out
+}
+
+define <8 x i16> @uminqv_i16(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: define <8 x i16> @uminqv_i16(
+; CHECK-SAME: <vscale x 8 x i16> [[A:%.*]]) {
+; CHECK-NEXT:    ret <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+;
+  %res = call <8 x i16> @llvm.aarch64.sve.uminqv.v8i16.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a);
+  ret <8 x i16> %res
+}

[AArch64] replace SVE intrinsics with no active lanes with constant

cdb6dec

llvmbot added backend:AArch64 llvm:transforms labels Sep 4, 2024

Lukacma requested review from CarolineConcatto and SpencerAbson September 4, 2024 16:23

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AArch64] replace SVE intrinsics with no active lanes with constant #107266

[AArch64] replace SVE intrinsics with no active lanes with constant #107266

Uh oh!

Lukacma commented Sep 4, 2024

Uh oh!

Lukacma commented Sep 4, 2024

Uh oh!

llvmbot commented Sep 4, 2024 •

edited

Loading

Uh oh!

Uh oh!

[AArch64] replace SVE intrinsics with no active lanes with constant #107266

Are you sure you want to change the base?

[AArch64] replace SVE intrinsics with no active lanes with constant #107266

Uh oh!

Conversation

Lukacma commented Sep 4, 2024

Uh oh!

Lukacma commented Sep 4, 2024

Uh oh!

llvmbot commented Sep 4, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

llvmbot commented Sep 4, 2024 •

edited

Loading