Skip to content

Commit f95f10c

Browse files
[AArch64] Generate zeroing forms of certain SVE2.2 instructions (9/11) (#116835)
SVE2.2 introduces instructions with predicated forms with zeroing of the inactive lanes. This allows in some cases to save a `movprfx` or a `mov` instruction when emitting code for `_x` or `_z` variants of intrinsics. This patch adds support for emitting the zeroing forms of certain `URECPE`, `URSQRTE`, `SQABS` and `SQNEG` instructions.
1 parent e492083 commit f95f10c

File tree

3 files changed

+874
-5
lines changed

3 files changed

+874
-5
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4283,10 +4283,10 @@ let Predicates = [HasSVE2p2_or_SME2p2] in {
42834283
defm FLOGB_ZPzZ : sve_fp_z2op_p_zd_d_flogb<"flogb", int_aarch64_sve_flogb>;
42844284

42854285
// SVE2 integer unary operations, zeroing predicate
4286-
def URECPE_ZPzZ : sve2_int_un_pred_arit_z<0b10, 0b00, "urecpe", ZPR32>;
4287-
def URSQRTE_ZPzZ : sve2_int_un_pred_arit_z<0b10, 0b01, "ursqrte", ZPR32>;
4288-
defm SQABS_ZPzZ : sve2_int_un_pred_arit_z<0b10, "sqabs">;
4289-
defm SQNEG_ZPzZ : sve2_int_un_pred_arit_z<0b11, "sqneg">;
4286+
defm URECPE_ZPzZ : sve2_int_un_pred_arit_z_S<0b00, "urecpe", int_aarch64_sve_urecpe>;
4287+
defm URSQRTE_ZPzZ : sve2_int_un_pred_arit_z_S<0b01, "ursqrte", int_aarch64_sve_ursqrte>;
4288+
defm SQABS_ZPzZ : sve2_int_un_pred_arit_z< 0b10, "sqabs", int_aarch64_sve_sqabs>;
4289+
defm SQNEG_ZPzZ : sve2_int_un_pred_arit_z< 0b11, "sqneg", int_aarch64_sve_sqneg>;
42904290

42914291
// Floating point round to integral fp value in integer size range
42924292
// Merging

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4200,11 +4200,22 @@ multiclass sve2_int_un_pred_arit<bits<2> opc, string asm, SDPatternOperator op>
42004200
defm : SVE_3_Op_Undef_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _D_UNDEF)>;
42014201
}
42024202

4203-
multiclass sve2_int_un_pred_arit_z<bits<2> opc, string asm> {
4203+
multiclass sve2_int_un_pred_arit_z_S<bits<2> opc, string asm, SDPatternOperator op> {
4204+
def _S : sve2_int_un_pred_arit_z<0b10, opc, asm, ZPR32>;
4205+
4206+
defm : SVE_3_Op_UndefZero_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
4207+
}
4208+
4209+
multiclass sve2_int_un_pred_arit_z<bits<2> opc, string asm, SDPatternOperator op> {
42044210
def _B : sve2_int_un_pred_arit_z<0b00, opc, asm, ZPR8>;
42054211
def _H : sve2_int_un_pred_arit_z<0b01, opc, asm, ZPR16>;
42064212
def _S : sve2_int_un_pred_arit_z<0b10, opc, asm, ZPR32>;
42074213
def _D : sve2_int_un_pred_arit_z<0b11, opc, asm, ZPR64>;
4214+
4215+
defm : SVE_3_Op_UndefZero_Pat<nxv16i8, op, nxv16i8, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
4216+
defm : SVE_3_Op_UndefZero_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
4217+
defm : SVE_3_Op_UndefZero_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
4218+
defm : SVE_3_Op_UndefZero_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
42084219
}
42094220

42104221
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)