Skip to content

Commit 2b3266c

Browse files
[AArch64] Generate zeroing forms of certain SVE2.2 instructions (1/11) (#116259)
SVE2.2 introduces instructions with predicated forms with zeroing of the inactive lanes. This allows in some cases to save a `movprfx` or a `mov` instruction when emitting code for `_x` or `_z` variants of intrinsics. This patch adds support for emitting the zeroing forms of `ABS`, `NEG`, `FABS`, and `FNEG` instructions.
1 parent f4974e0 commit 2b3266c

File tree

4 files changed

+719
-23
lines changed

4 files changed

+719
-23
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,9 @@ def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;
381381

382382
def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
383383

384+
def UseUnaryUndefPseudos
385+
: Predicate<"!(Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p2() || Subtarget->hasSME2p2()))">;
386+
384387
def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
385388
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
386389
SDTCisInt<1>]>>;

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,14 @@ let Predicates = [HasSVEorSME] in {
675675
defm FABS_ZPmZ : sve_int_un_pred_arit_bitwise_fp<0b100, "fabs", AArch64fabs_mt>;
676676
defm FNEG_ZPmZ : sve_int_un_pred_arit_bitwise_fp<0b101, "fneg", AArch64fneg_mt>;
677677

678+
let Predicates = [HasSVEorSME, UseUnaryUndefPseudos] in {
679+
defm FABS_ZPmZ : sve_fp_un_pred_arit_hsd<AArch64fabs_mt>;
680+
defm FNEG_ZPmZ : sve_fp_un_pred_arit_hsd<AArch64fneg_mt>;
681+
682+
defm ABS_ZPmZ : sve_int_un_pred_arit_bhsd<AArch64abs_mt>;
683+
defm NEG_ZPmZ : sve_int_un_pred_arit_bhsd<AArch64neg_mt>;
684+
}
685+
678686
foreach VT = [nxv2bf16, nxv4bf16, nxv8bf16] in {
679687
// No dedicated instruction, so just clear the sign bit.
680688
def : Pat<(VT (fabs VT:$op)),
@@ -4321,16 +4329,16 @@ let Predicates = [HasSVE2p2orSME2p2] in {
43214329
defm NOT_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b110, "not">;
43224330

43234331
// floating point
4324-
defm FABS_ZPzZ : sve_int_un_pred_arit_bitwise_fp_z<0b100, "fabs">;
4325-
defm FNEG_ZPzZ : sve_int_un_pred_arit_bitwise_fp_z<0b101, "fneg">;
4332+
defm FABS_ZPzZ : sve_int_un_pred_arit_bitwise_fp_z<0b100, "fabs", AArch64fabs_mt>;
4333+
defm FNEG_ZPzZ : sve_int_un_pred_arit_bitwise_fp_z<0b101, "fneg", AArch64fneg_mt>;
43264334

43274335
// SVE2p2 integer unary arithmetic, zeroing predicate
43284336
defm SXTB_ZPzZ : sve_int_un_pred_arit_h_z<0b000, "sxtb">;
43294337
defm UXTB_ZPzZ : sve_int_un_pred_arit_h_z<0b001, "uxtb">;
43304338
defm SXTH_ZPzZ : sve_int_un_pred_arit_w_z<0b010, "sxth">;
43314339
defm UXTH_ZPzZ : sve_int_un_pred_arit_w_z<0b011, "uxth">;
4332-
defm ABS_ZPzZ : sve_int_un_pred_arit_z< 0b110, "abs">;
4333-
defm NEG_ZPzZ : sve_int_un_pred_arit_z< 0b111, "neg">;
4340+
defm ABS_ZPzZ : sve_int_un_pred_arit_z< 0b110, "abs", AArch64abs_mt>;
4341+
defm NEG_ZPzZ : sve_int_un_pred_arit_z< 0b111, "neg", AArch64neg_mt>;
43344342
def SXTW_ZPzZ_D : sve_int_un_pred_arit_z<0b11, 0b1000, "sxtw", ZPR64>;
43354343
def UXTW_ZPzZ_D : sve_int_un_pred_arit_z<0b11, 0b1010, "uxtw", ZPR64>;
43364344

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 38 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,8 @@ let Predicates = [HasSVEorSME] in {
482482
//===----------------------------------------------------------------------===//
483483
// SVE pattern match helpers.
484484
//===----------------------------------------------------------------------===//
485+
def SVEDup0 : ComplexPattern<vAny, 0, "SelectDupZero", []>;
486+
def SVEDup0Undef : ComplexPattern<vAny, 0, "SelectDupZeroOrUndef", []>;
485487

486488
class SVE_1_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
487489
Instruction inst>
@@ -502,6 +504,11 @@ multiclass SVE_1_Op_PassthruUndef_Pat<ValueType vtd, SDPatternOperator op, Value
502504
(inst $Op3, $Op1, $Op2)>;
503505
}
504506

507+
class SVE_1_Op_PassthruUndefZero_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
508+
ValueType vts, Instruction inst>
509+
: Pat<(vtd (op pg:$Op1, vts:$Op2, (vtd (SVEDup0Undef)))),
510+
(inst $Op1, $Op2)>;
511+
505512
// Used to match FP_ROUND_MERGE_PASSTHRU, which has an additional flag for the
506513
// type of rounding. This is matched by timm0_1 in pattern below and ignored.
507514
class SVE_1_Op_Passthru_Round_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
@@ -517,8 +524,6 @@ multiclass SVE_1_Op_PassthruUndef_Round_Pat<ValueType vtd, SDPatternOperator op,
517524
(inst $Op3, $Op1, $Op2)>;
518525
}
519526

520-
def SVEDup0 : ComplexPattern<vAny, 0, "SelectDupZero", []>;
521-
522527
class SVE_1_Op_PassthruZero_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
523528
ValueType vt2, Instruction inst>
524529
: Pat<(vtd (op (vtd (SVEDup0)), vt1:$Op1, vt2:$Op2)),
@@ -606,8 +611,6 @@ class SVE_4_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
606611
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, (vt4 ImmTy:$Op4))),
607612
(inst $Op1, $Op2, $Op3, ImmTy:$Op4)>;
608613

609-
def SVEDup0Undef : ComplexPattern<vAny, 0, "SelectDupZeroOrUndef", []>;
610-
611614
let AddedComplexity = 1 in {
612615
class SVE_3_Op_Pat_SelZero<ValueType vtd, SDPatternOperator op, ValueType vt1,
613616
ValueType vt2, ValueType vt3, Instruction inst>
@@ -4820,23 +4823,18 @@ multiclass sve_int_un_pred_arit<bits<3> opc, string asm,
48204823
def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
48214824
def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
48224825
def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
4823-
4824-
def _B_UNDEF : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
4825-
def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
4826-
def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
4827-
def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
4828-
4829-
defm : SVE_1_Op_PassthruUndef_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _B_UNDEF)>;
4830-
defm : SVE_1_Op_PassthruUndef_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _H_UNDEF)>;
4831-
defm : SVE_1_Op_PassthruUndef_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _S_UNDEF)>;
4832-
defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _D_UNDEF)>;
48334826
}
48344827

4835-
multiclass sve_int_un_pred_arit_z<bits<3> opc, string asm> {
4828+
multiclass sve_int_un_pred_arit_z<bits<3> opc, string asm, SDPatternOperator op> {
48364829
def _B : sve_int_un_pred_arit_z<0b00, { opc, 0b0 }, asm, ZPR8>;
48374830
def _H : sve_int_un_pred_arit_z<0b01, { opc, 0b0 }, asm, ZPR16>;
48384831
def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b0 }, asm, ZPR32>;
48394832
def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b0 }, asm, ZPR64>;
4833+
4834+
def : SVE_1_Op_PassthruUndefZero_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
4835+
def : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
4836+
def : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
4837+
def : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
48404838
}
48414839

48424840
multiclass sve_int_un_pred_arit_h<bits<3> opc, string asm,
@@ -4950,7 +4948,22 @@ multiclass sve_int_un_pred_arit_bitwise_fp<bits<3> opc, string asm,
49504948
def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
49514949
def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
49524950
def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
4951+
}
49534952

4953+
multiclass sve_int_un_pred_arit_bitwise_fp_z<bits<3> opc, string asm, SDPatternOperator op> {
4954+
def _H : sve_int_un_pred_arit_z<0b01, { opc, 0b1 }, asm, ZPR16>;
4955+
def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b1 }, asm, ZPR32>;
4956+
def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b1 }, asm, ZPR64>;
4957+
4958+
def : SVE_1_Op_PassthruUndefZero_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
4959+
def : SVE_1_Op_PassthruUndefZero_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
4960+
def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
4961+
def : SVE_1_Op_PassthruUndefZero_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
4962+
def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
4963+
def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
4964+
}
4965+
4966+
multiclass sve_fp_un_pred_arit_hsd<SDPatternOperator op> {
49544967
def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
49554968
def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
49564969
def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
@@ -4963,10 +4976,16 @@ multiclass sve_int_un_pred_arit_bitwise_fp<bits<3> opc, string asm,
49634976
defm : SVE_1_Op_PassthruUndef_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Pseudo>(NAME # _D_UNDEF)>;
49644977
}
49654978

4966-
multiclass sve_int_un_pred_arit_bitwise_fp_z<bits<3> opc, string asm> {
4967-
def _H : sve_int_un_pred_arit_z<0b01, { opc, 0b1 }, asm, ZPR16>;
4968-
def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b1 }, asm, ZPR32>;
4969-
def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b1 }, asm, ZPR64>;
4979+
multiclass sve_int_un_pred_arit_bhsd<SDPatternOperator op> {
4980+
def _B_UNDEF : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
4981+
def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
4982+
def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
4983+
def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
4984+
4985+
defm : SVE_1_Op_PassthruUndef_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _B_UNDEF)>;
4986+
defm : SVE_1_Op_PassthruUndef_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _H_UNDEF)>;
4987+
defm : SVE_1_Op_PassthruUndef_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _S_UNDEF)>;
4988+
defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _D_UNDEF)>;
49704989
}
49714990

49724991
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)