-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64] Generate zeroing forms of certain SVE2.2 instructions (11/11) #116837
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64] Generate zeroing forms of certain SVE2.2 instructions (11/11) #116837
Conversation
2cf5ef6
to
82d5f95
Compare
…r the new all-true patterns, change undef to poison
82d5f95
to
38353aa
Compare
@llvm/pr-subscribers-backend-aarch64 Author: Momchil Velikov (momchil-velikov) ChangesSVE2.2 introduces instructions with predicated forms with zeroing of This patch adds support for emitting the zeroing forms of certain Patch is 44.27 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116837.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 7dd6d49bf20227..18da10f65ffa56 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4325,14 +4325,14 @@ let Predicates = [HasSVE2p2orSME2p2] in {
defm FNEG_ZPzZ : sve_int_un_pred_arit_bitwise_fp_z<0b101, "fneg", AArch64fneg_mt>;
// SVE2p2 integer unary arithmetic, zeroing predicate
- defm SXTB_ZPzZ : sve_int_un_pred_arit_h_z<0b000, "sxtb">;
- defm UXTB_ZPzZ : sve_int_un_pred_arit_h_z<0b001, "uxtb">;
- defm SXTH_ZPzZ : sve_int_un_pred_arit_w_z<0b010, "sxth">;
- defm UXTH_ZPzZ : sve_int_un_pred_arit_w_z<0b011, "uxth">;
+ defm SXTB_ZPzZ : sve_int_un_pred_arit_h_z<0b000, "sxtb", AArch64sxt_mt>;
+ defm UXTB_ZPzZ : sve_int_un_pred_arit_h_z<0b001, "uxtb", AArch64uxt_mt>;
+ defm SXTH_ZPzZ : sve_int_un_pred_arit_w_z<0b010, "sxth", AArch64sxt_mt>;
+ defm UXTH_ZPzZ : sve_int_un_pred_arit_w_z<0b011, "uxth", AArch64uxt_mt>;
defm ABS_ZPzZ : sve_int_un_pred_arit_z< 0b110, "abs", AArch64abs_mt>;
defm NEG_ZPzZ : sve_int_un_pred_arit_z< 0b111, "neg", AArch64neg_mt>;
- def SXTW_ZPzZ_D : sve_int_un_pred_arit_z<0b11, 0b1000, "sxtw", ZPR64>;
- def UXTW_ZPzZ_D : sve_int_un_pred_arit_z<0b11, 0b1010, "uxtw", ZPR64>;
+ defm SXTW_ZPzZ : sve_int_un_pred_arit_d_z<0b100, "sxtw", AArch64sxt_mt>;
+ defm UXTW_ZPzZ : sve_int_un_pred_arit_d_z<0b101, "uxtw", AArch64uxt_mt>;
// SVE predicate count
defm FIRSTP_XPP : sve_int_pcount_pred_tmp<0b001, "firstp">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 0ef862fc1a27cf..46b7226399506f 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -662,6 +662,17 @@ multiclass SVE_InReg_Extend_PassthruUndef<ValueType vt, SDPatternOperator op, Va
(inst $PassThru, $Pg, $Src)>;
}
+multiclass SVE_InReg_Extend_PassthruUndefZero<ValueType vt, SDPatternOperator op, ValueType pt,
+ ValueType inreg_vt, Instruction inst> {
+ let AddedComplexity = 1 in {
+ def : Pat<(vt (op pt:$Pg, vt:$Src, inreg_vt, (vt (SVEDup0Undef)))),
+ (inst $Pg, $Src)>;
+
+ def : Pat<(vt (op (pt (SVEAllActive:$Pg)), vt:$Src, inreg_vt, (vt (SVEAny)))),
+ (inst $Pg, $Src)>;
+ }
+}
+
class SVE_Shift_DupImm_Pred_Pat<ValueType vt, SDPatternOperator op,
ValueType pt, ValueType it,
ComplexPattern cast, Instruction inst>
@@ -4899,10 +4910,14 @@ multiclass sve_int_un_pred_arit_h<bits<3> opc, string asm,
defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i8, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
-multiclass sve_int_un_pred_arit_h_z<bits<3> opc, string asm> {
+multiclass sve_int_un_pred_arit_h_z<bits<3> opc, string asm, SDPatternOperator op> {
def _H : sve_int_un_pred_arit_z<0b01, { opc, 0b0 }, asm, ZPR16>;
def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b0 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b0 }, asm, ZPR64>;
+
+ defm : SVE_InReg_Extend_PassthruUndefZero<nxv8i16, op, nxv8i1, nxv8i8, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_InReg_Extend_PassthruUndefZero<nxv4i32, op, nxv4i1, nxv4i8, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_InReg_Extend_PassthruUndefZero<nxv2i64, op, nxv2i1, nxv2i8, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_un_pred_arit_w<bits<3> opc, string asm,
@@ -4922,9 +4937,12 @@ multiclass sve_int_un_pred_arit_w<bits<3> opc, string asm,
defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i16, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
-multiclass sve_int_un_pred_arit_w_z<bits<3> opc, string asm> {
+multiclass sve_int_un_pred_arit_w_z<bits<3> opc, string asm, SDPatternOperator op> {
def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b0 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b0 }, asm, ZPR64>;
+
+ defm : SVE_InReg_Extend_PassthruUndefZero<nxv4i32, op, nxv4i1, nxv4i16, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_InReg_Extend_PassthruUndefZero<nxv2i64, op, nxv2i1, nxv2i16, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_un_pred_arit_d<bits<3> opc, string asm,
@@ -4939,6 +4957,12 @@ multiclass sve_int_un_pred_arit_d<bits<3> opc, string asm,
defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i32, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
+multiclass sve_int_un_pred_arit_d_z<bits<3> opc, string asm, SDPatternOperator op> {
+ def _D : sve_int_un_pred_arit_z<0b11, {opc, 0b0}, asm, ZPR64>;
+
+ defm : SVE_InReg_Extend_PassthruUndefZero<nxv2i64, op, nxv2i1, nxv2i32, !cast<Instruction>(NAME # _D)>;
+}
+
multiclass sve_int_un_pred_arit_bitwise<bits<3> opc, string asm,
SDPatternOperator op> {
def _B : sve_int_un_pred_arit<0b00, { opc, 0b1 }, asm, ZPR8>,
diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-ext.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-ext.ll
new file mode 100644
index 00000000000000..b29805c2b8f05a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/zeroing-forms-ext.ll
@@ -0,0 +1,1028 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mattr=+sve2p2 < %s | FileCheck %s -check-prefix CHECK-2p2
+
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -mattr=+sme2p2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-2p2
+
+target triple = "aarch64-linux"
+
+define <vscale x 8 x i16> @test_svextb_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svextb_s16_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sxtb z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svextb_s16_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sxtb z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sxtb.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svextb_s16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svextb_s16_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxtb z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svextb_s16_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sxtb z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sxtb.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svextb_s16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svextb_s16_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: sxtb z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svextb_s16_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sxtb z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sxtb.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svextb_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svextb_s32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svextb_s32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sxtb z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sxtb.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svextb_s32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svextb_s32_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxtb z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svextb_s32_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sxtb z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sxtb.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svextb_s32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svextb_s32_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: sxtb z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svextb_s32_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sxtb z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sxtb.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svextb_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svextb_s64_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sxtb z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svextb_s64_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sxtb z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxtb.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svextb_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svextb_s64_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxtb z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svextb_s64_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sxtb z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxtb.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svextb_s64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svextb_s64_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: sxtb z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svextb_s64_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sxtb z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxtb.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 8 x i16> @test_svextb_u16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svextb_u16_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uxtb z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svextb_u16_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: uxtb z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svextb_u16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svextb_u16_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: uxtb z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svextb_u16_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: uxtb z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svextb_u16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svextb_u16_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: uxtb z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svextb_u16_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: uxtb z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svextb_u32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svextb_u32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uxtb z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svextb_u32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: uxtb z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svextb_u32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svextb_u32_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: uxtb z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svextb_u32_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: uxtb z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svextb_u32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svextb_u32_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: uxtb z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svextb_u32_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: uxtb z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svextb_u64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svextb_u64_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uxtb z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svextb_u64_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: uxtb z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svextb_u64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svextb_u64_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: uxtb z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svextb_u64_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: uxtb z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svextb_u64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svextb_u64_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: uxtb z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svextb_u64_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: uxtb z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 4 x i32> @test_svexth_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svexth_s32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sxth z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svexth_s32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sxth z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sxth.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svexth_s32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svexth_s32_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxth z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svexth_s32_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sxth z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sxth.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svexth_s32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svexth_s32_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: sxth z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svexth_s32_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sxth z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sxth.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svexth_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svexth_s64_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sxth z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svexth_s64_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sxth z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxth.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svexth_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svexth_s64_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxth z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svexth_s64_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sxth z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxth.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svexth_s64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svexth_s64_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: sxth z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svexth_s64_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: sxth z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxth.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 4 x i32> @test_svexth_u32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svexth_u32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uxth z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svexth_u32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: uxth z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.a...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/73/builds/12507 Here is the relevant piece of the build log for the reference
|
SVE2.2 introduces instructions with predicated forms with zeroing of
the inactive lanes. This allows in some cases to save a
movprfx
ora
mov
instruction when emitting code for_x
or_z
variants ofintrinsics.
This patch adds support for emitting the zeroing forms of certain
SXTB
,UXTB
,SXTH
,UXTH
,SXTW
, andUXTW
instructions.