-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64] optimise SVE cvt intrinsics with no active lanes #104809
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-aarch64 Author: None (Lukacma) ChangesThis patch extends #73964 and optimises SVE cvt intrinsics away when predicate is zero. Patch is 20.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/104809.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index a782c9c4351237..c546dfb1453492 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1073,6 +1073,32 @@ static bool isAllActivePredicate(Value *Pred) {
m_ConstantInt<AArch64SVEPredPattern::all>()));
}
+// Simplify unary operation where predicate has all inactive lanes by replacing
+// instruction with its operand
+static std::optional<Instruction *>
+instCombineSVENoActiveUnaryReplace(InstCombiner &IC, IntrinsicInst &II,
+ bool hasInactiveVector) {
+ int PredOperand = hasInactiveVector ? 1 : 0;
+ int ReplaceOperand = hasInactiveVector ? 0 : 1;
+ if (match(II.getOperand(PredOperand), m_ZeroInt())) {
+ IC.replaceInstUsesWith(II, II.getOperand(ReplaceOperand));
+ return IC.eraseInstFromFunction(II);
+ }
+ return std::nullopt;
+}
+
+// Simplify unary operation where predicate has all inactive lanes or try to
+// replace with _x form when all lanes are active
+static std::optional<Instruction *>
+instCombineSVEAllOrNoActiveUnary(InstCombiner &IC, IntrinsicInst &II) {
+ if (isAllActivePredicate(II.getOperand(1)) &&
+ !isa<llvm::UndefValue>(II.getOperand(0))) {
+ Value *Undef = llvm::UndefValue::get(II.getType());
+ return IC.replaceOperand(II, 0, Undef);
+ }
+ return instCombineSVENoActiveUnaryReplace(IC, II, true);
+}
+
// Erase unary operation where predicate has all inactive lanes
static std::optional<Instruction *>
instCombineSVENoActiveUnaryErase(InstCombiner &IC, IntrinsicInst &II,
@@ -2104,7 +2130,41 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
switch (IID) {
default:
break;
-
+ case Intrinsic::aarch64_sve_fcvt_bf16f32:
+ case Intrinsic::aarch64_sve_fcvt_f16f32:
+ case Intrinsic::aarch64_sve_fcvt_f16f64:
+ case Intrinsic::aarch64_sve_fcvt_f32f16:
+ case Intrinsic::aarch64_sve_fcvt_f32f64:
+ case Intrinsic::aarch64_sve_fcvt_f64f16:
+ case Intrinsic::aarch64_sve_fcvt_f64f32:
+ case Intrinsic::aarch64_sve_fcvtlt_f32f16:
+ case Intrinsic::aarch64_sve_fcvtlt_f64f32:
+ case Intrinsic::aarch64_sve_fcvtnt_bf16f32:
+ case Intrinsic::aarch64_sve_fcvtnt_f16f32:
+ case Intrinsic::aarch64_sve_fcvtnt_f32f64:
+ case Intrinsic::aarch64_sve_fcvtx_f32f64:
+ case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
+ case Intrinsic::aarch64_sve_fcvtzs:
+ case Intrinsic::aarch64_sve_fcvtzs_i32f16:
+ case Intrinsic::aarch64_sve_fcvtzs_i32f64:
+ case Intrinsic::aarch64_sve_fcvtzs_i64f16:
+ case Intrinsic::aarch64_sve_fcvtzs_i64f32:
+ case Intrinsic::aarch64_sve_fcvtzu:
+ case Intrinsic::aarch64_sve_fcvtzu_i32f16:
+ case Intrinsic::aarch64_sve_fcvtzu_i32f64:
+ case Intrinsic::aarch64_sve_fcvtzu_i64f16:
+ case Intrinsic::aarch64_sve_fcvtzu_i64f32:
+ case Intrinsic::aarch64_sve_scvtf:
+ case Intrinsic::aarch64_sve_scvtf_f16i32:
+ case Intrinsic::aarch64_sve_scvtf_f16i64:
+ case Intrinsic::aarch64_sve_scvtf_f32i64:
+ case Intrinsic::aarch64_sve_scvtf_f64i32:
+ case Intrinsic::aarch64_sve_ucvtf:
+ case Intrinsic::aarch64_sve_ucvtf_f16i32:
+ case Intrinsic::aarch64_sve_ucvtf_f16i64:
+ case Intrinsic::aarch64_sve_ucvtf_f32i64:
+ case Intrinsic::aarch64_sve_ucvtf_f64i32:
+ return instCombineSVEAllOrNoActiveUnary(IC, II);
case Intrinsic::aarch64_sve_st1_scatter:
case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
case Intrinsic::aarch64_sve_st1_scatter_sxtw:
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll
new file mode 100644
index 00000000000000..eb01ff5dacab56
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll
@@ -0,0 +1,309 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+target triple = "aarch64-unknown-linux-gnu"
+
+define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(
+; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x bfloat> [[A]]
+;
+ %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> zeroinitializer, <vscale x 4 x float> %b)
+ ret <vscale x 8 x bfloat> %out
+}
+
+define <vscale x 8 x half> @test_fcvt_f16_f32(<vscale x 8 x half> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_fcvt_f16_f32(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x half> [[A]]
+;
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @test_fcvt_f16_f64(<vscale x 8 x half> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_fcvt_f16_f64(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x half> [[A]]
+;
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @test_fcvt_f32_f16(<vscale x 4 x float> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvt_f32_f16(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x float> [[A]]
+;
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 8 x half> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @test_fcvt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvt_f32_f64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x float> [[A]]
+;
+ %out = call<vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @test_fcvt_f64_f16(<vscale x 2 x double> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 2 x double> @test_fcvt_f64_f16(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 2 x double> [[A]]
+;
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 8 x half> %b)
+ ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @test_fcvt_f64_f32(<vscale x 2 x double> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 2 x double> @test_fcvt_f64_f32(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 2 x double> [[A]]
+;
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 4 x float> %b)
+ ret <vscale x 2 x double> %out
+}
+
+define <vscale x 4 x float> @test_fcvtlt_f32_f16(<vscale x 4 x float> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtlt_f32_f16(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x float> [[A]]
+;
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 8 x half> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @test_fcvtlt_f64_f32(<vscale x 2 x double> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 2 x double> @test_fcvtlt_f64_f32(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 2 x double> [[A]]
+;
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 4 x float> %b)
+ ret <vscale x 2 x double> %out
+}
+
+define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(
+; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x bfloat> [[A]]
+;
+ %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> zeroinitializer, <vscale x 4 x float> %b)
+ ret <vscale x 8 x bfloat> %out
+}
+
+define <vscale x 8 x half> @test_fcvtnt_f16_f32(<vscale x 8 x half> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_fcvtnt_f16_f32(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x half> [[A]]
+;
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @test_fcvtnt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtnt_f32_f64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x float> [[A]]
+;
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.f32f64(<vscale x 4 x float> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @test_fcvtx_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtx_f32_f64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x float> [[A]]
+;
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @test_fcvtxnt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtxnt_f32_f64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x float> [[A]]
+;
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.f32f64(<vscale x 4 x float> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 8 x i16> @test_fcvtzs(<vscale x 8 x i16> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 8 x i16> @test_fcvtzs(
+; CHECK-SAME: <vscale x 8 x i16> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x i16> [[A]]
+;
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 8 x i16> %a, <vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @test_fcvtzs_i32_f16(<vscale x 4 x i32> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzs_i32_f16(
+; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x i32> [[A]]
+;
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer,<vscale x 8 x half> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @test_fcvtzs_i32_f64(<vscale x 4 x i32> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzs_i32_f64(
+; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x i32> [[A]]
+;
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @test_fcvtzs_i64_f16(<vscale x 2 x i64> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzs_i64_f16(
+; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 2 x i64> [[A]]
+;
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> %a, <vscale x 2 x i1> zeroinitializer,<vscale x 8 x half> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @test_fcvtzs_i64_f32(<vscale x 2 x i64> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzs_i64_f32(
+; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 2 x i64> [[A]]
+;
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 4 x float> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 8 x i16> @test_fcvtzu(<vscale x 8 x i16> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 8 x i16> @test_fcvtzu(
+; CHECK-SAME: <vscale x 8 x i16> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x i16> [[A]]
+;
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 8 x i16> %a, <vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @test_fcvtzu_i32_f16(<vscale x 4 x i32> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzu_i32_f16(
+; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x i32> [[A]]
+;
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer,<vscale x 8 x half> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @test_fcvtzu_i32_f64(<vscale x 4 x i32> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzu_i32_f64(
+; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x i32> [[A]]
+;
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @test_fcvtzu_i64_f16(<vscale x 2 x i64> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzu_i64_f16(
+; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 2 x i64> [[A]]
+;
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> %a, <vscale x 2 x i1> zeroinitializer,<vscale x 8 x half> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @test_fcvtzu_i64_f32(<vscale x 2 x i64> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzu_i64_f32(
+; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 2 x i64> [[A]]
+;
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 4 x float> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 8 x half> @test_scvtf(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_scvtf(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x half> [[A]]
+;
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> %a, <vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_i32(<vscale x 8 x half> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_scvtf_f16_i32(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x half> [[A]]
+;
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_i64(<vscale x 8 x half> %a,<vscale x 2 x i64> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_scvtf_f16_i64(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x half> [[A]]
+;
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @test_scvtf_f32_i64(<vscale x 4 x float> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_scvtf_f32_i64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x float> [[A]]
+;
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @test_scvtf_f64_i32(<vscale x 2 x double> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 2 x double> @test_scvtf_f64_i32(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 2 x double> [[A]]
+;
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+ ret <vscale x 2 x double> %out
+}
+
+define <vscale x 8 x half> @test_ucvtf(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_ucvtf(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x half> [[A]]
+;
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> %a, <vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_i32(<vscale x 8 x half> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_ucvtf_f16_i32(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x half> [[A]]
+;
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_i64(<vscale x 8 x half> %a,<vscale x 2 x i64> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_ucvtf_f16_i64(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x half> [[A]]
+;
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @test_ucvtf_f32_i64(<vscale x 4 x float> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_ucvtf_f32_i64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x float> [[A]]
+;
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @test_ucvtf_f64_i32(<vscale x 2 x double> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 2 x double> @test_ucvtf_f64_i32(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 2 x double> [[A]]
+;
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+ ret <vscale x 2 x...
[truncated]
|
This patch comes from splitting #86651 into multiple patches as requested in review of that patch |
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll
Outdated
Show resolved
Hide resolved
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll
Outdated
Show resolved
Hide resolved
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll
Outdated
Show resolved
Hide resolved
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll
Show resolved
Hide resolved
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/123/builds/4512 Here is the relevant piece of the build log for the reference
|
This patch extends #73964 and optimises SVE cvt intrinsics away when predicate is zero.