Skip to content

Commit 7483eb6

Browse files
[AArch64][SVE] Implement shift intrinsics
Summary: Adds the following intrinsics: - asr & asrd - insr - lsl & lsr This patch also adds a new AArch64ISD node (INSR) to represent the int_aarch64_sve_insr intrinsic. Reviewers: huntergr, sdesmalen, dancgr, mgudim, rengolin, efriedma Reviewed By: sdesmalen Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, cameron.mcinally, cfe-commits, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70437
1 parent 14f7673 commit 7483eb6

File tree

7 files changed

+482
-33
lines changed

7 files changed

+482
-33
lines changed

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -790,6 +790,21 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
790790
llvm_i32_ty],
791791
[IntrNoMem]>;
792792

793+
class AdvSIMD_Pred2VectorArg_Intrinsic
794+
: Intrinsic<[llvm_anyvector_ty],
795+
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
796+
LLVMMatchType<0>,
797+
LLVMMatchType<0>],
798+
[IntrNoMem]>;
799+
800+
class AdvSIMD_Pred3VectorArg_Intrinsic
801+
: Intrinsic<[llvm_anyvector_ty],
802+
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
803+
LLVMMatchType<0>,
804+
LLVMMatchType<0>,
805+
LLVMMatchType<0>],
806+
[IntrNoMem]>;
807+
793808
class AdvSIMD_SVE_Compare_Intrinsic
794809
: Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
795810
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
@@ -817,6 +832,20 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
817832
llvm_anyvector_ty],
818833
[IntrNoMem]>;
819834

835+
class AdvSIMD_SVE_ShiftByImm_Intrinsic
836+
: Intrinsic<[llvm_anyvector_ty],
837+
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
838+
LLVMMatchType<0>,
839+
llvm_i32_ty],
840+
[IntrNoMem]>;
841+
842+
class AdvSIMD_SVE_ShiftWide_Intrinsic
843+
: Intrinsic<[llvm_anyvector_ty],
844+
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
845+
LLVMMatchType<0>,
846+
llvm_nxv2i64_ty],
847+
[IntrNoMem]>;
848+
820849
class AdvSIMD_SVE_Unpack_Intrinsic
821850
: Intrinsic<[llvm_anyvector_ty],
822851
[LLVMSubdivide2VectorType<0>],
@@ -867,6 +896,12 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
867896
llvm_anyvector_ty],
868897
[IntrNoMem]>;
869898

899+
class AdvSIMD_SVE_INSR_Intrinsic
900+
: Intrinsic<[llvm_anyvector_ty],
901+
[LLVMMatchType<0>,
902+
LLVMVectorElementType<0>],
903+
[IntrNoMem]>;
904+
870905
class AdvSIMD_SVE_PUNPKHI_Intrinsic
871906
: Intrinsic<[LLVMHalfElementsVectorType<0>],
872907
[llvm_anyvector_ty],
@@ -919,18 +954,6 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
919954

920955
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
921956

922-
923-
class AdvSIMD_Pred2VectorArg_Intrinsic
924-
: Intrinsic<[llvm_anyvector_ty],
925-
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, LLVMMatchType<0>],
926-
[IntrNoMem]>;
927-
928-
class AdvSIMD_Pred3VectorArg_Intrinsic
929-
: Intrinsic<[llvm_anyvector_ty],
930-
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
931-
[IntrNoMem]>;
932-
933-
934957
//
935958
// Integer arithmetic
936959
//
@@ -975,6 +998,17 @@ def int_aarch64_sve_sdot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
975998
def int_aarch64_sve_udot : AdvSIMD_SVE_DOT_Intrinsic;
976999
def int_aarch64_sve_udot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
9771000

1001+
// Shifts
1002+
1003+
def int_aarch64_sve_asr : AdvSIMD_Pred2VectorArg_Intrinsic;
1004+
def int_aarch64_sve_asr_wide : AdvSIMD_SVE_ShiftWide_Intrinsic;
1005+
def int_aarch64_sve_asrd : AdvSIMD_SVE_ShiftByImm_Intrinsic;
1006+
def int_aarch64_sve_insr : AdvSIMD_SVE_INSR_Intrinsic;
1007+
def int_aarch64_sve_lsl : AdvSIMD_Pred2VectorArg_Intrinsic;
1008+
def int_aarch64_sve_lsl_wide : AdvSIMD_SVE_ShiftWide_Intrinsic;
1009+
def int_aarch64_sve_lsr : AdvSIMD_Pred2VectorArg_Intrinsic;
1010+
def int_aarch64_sve_lsr_wide : AdvSIMD_SVE_ShiftWide_Intrinsic;
1011+
9781012
//
9791013
// Counting bits
9801014
//

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -828,6 +828,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
828828
if (isTypeLegal(VT) && VT.getVectorElementType() != MVT::i1)
829829
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
830830
}
831+
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
832+
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
831833
}
832834

833835
PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
@@ -1333,6 +1335,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
13331335
case AArch64ISD::SUNPKLO: return "AArch64ISD::SUNPKLO";
13341336
case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI";
13351337
case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO";
1338+
case AArch64ISD::INSR: return "AArch64ISD::INSR";
13361339
}
13371340
return nullptr;
13381341
}
@@ -2884,6 +2887,16 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
28842887
return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
28852888
Op.getOperand(1));
28862889

2890+
case Intrinsic::aarch64_sve_insr: {
2891+
SDValue Scalar = Op.getOperand(2);
2892+
EVT ScalarTy = Scalar.getValueType();
2893+
if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
2894+
Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
2895+
2896+
return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
2897+
Op.getOperand(1), Scalar);
2898+
}
2899+
28872900
case Intrinsic::localaddress: {
28882901
const auto &MF = DAG.getMachineFunction();
28892902
const auto *RegInfo = Subtarget->getRegisterInfo();

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,8 @@ enum NodeType : unsigned {
196196
UUNPKHI,
197197
UUNPKLO,
198198

199+
INSR,
200+
199201
// NEON Load/Store with post-increment base updates
200202
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
201203
LD3post,

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ def SDT_AArch64FCmp : SDTypeProfile<0, 2,
214214
SDTCisSameAs<0, 1>]>;
215215
def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
216216
def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
217+
def SDT_AArch64Insr : SDTypeProfile<1, 2, [SDTCisVec<0>]>;
217218
def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>,
218219
SDTCisSameAs<0, 1>,
219220
SDTCisSameAs<0, 2>]>;
@@ -401,6 +402,8 @@ def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
401402
def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>;
402403
def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>;
403404

405+
def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>;
406+
404407
def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>;
405408
def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>;
406409
def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>;

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,8 @@ let Predicates = [HasSVE] in {
199199

200200
defm SPLICE_ZPZ : sve_int_perm_splice<"splice">;
201201
defm COMPACT_ZPZ : sve_int_perm_compact<"compact">;
202-
defm INSR_ZR : sve_int_perm_insrs<"insr">;
203-
defm INSR_ZV : sve_int_perm_insrv<"insr">;
202+
defm INSR_ZR : sve_int_perm_insrs<"insr", AArch64insr>;
203+
defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>;
204204
def EXT_ZZI : sve_int_perm_extract_i<"ext">;
205205

206206
defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit">;
@@ -876,18 +876,18 @@ let Predicates = [HasSVE] in {
876876
defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0000, "asr">;
877877
defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0001, "lsr">;
878878
defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0011, "lsl">;
879-
defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd">;
880-
881-
defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr">;
882-
defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr">;
883-
defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl">;
884-
defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr">;
885-
defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr">;
886-
defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr">;
887-
888-
defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr">;
889-
defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr">;
890-
defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl">;
879+
defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", int_aarch64_sve_asrd>;
880+
881+
defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", int_aarch64_sve_asr>;
882+
defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", int_aarch64_sve_lsr>;
883+
defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", int_aarch64_sve_lsl>;
884+
defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", null_frag>;
885+
defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", null_frag>;
886+
defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", null_frag>;
887+
888+
defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr", int_aarch64_sve_asr_wide>;
889+
defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>;
890+
defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>;
891891

892892
defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, nxv8f16, nxv16i1, nxv4f32, ElementSizeS>;
893893
defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, nxv4f32, nxv16i1, nxv8f16, ElementSizeS>;

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,12 @@ class SVE_4_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
304304
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, vt4:$Op4)),
305305
(inst $Op1, $Op2, $Op3, $Op4)>;
306306

307+
class SVE_3_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
308+
ValueType vt2, ValueType vt3, Operand ImmTy,
309+
Instruction inst>
310+
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, (vt3 ImmTy:$Op3))),
311+
(inst $Op1, $Op2, ImmTy:$Op3)>;
312+
307313
def SVEDup0Undef : ComplexPattern<i64, 0, "SelectDupZeroOrUndef", []>;
308314

309315
//===----------------------------------------------------------------------===//
@@ -888,14 +894,18 @@ class sve_int_perm_insrs<bits<2> sz8_64, string asm, ZPRRegOp zprty,
888894

889895
let Constraints = "$Zdn = $_Zdn";
890896
let DestructiveInstType = Destructive;
891-
let ElementSize = ElementSizeNone;
892897
}
893898

894-
multiclass sve_int_perm_insrs<string asm> {
899+
multiclass sve_int_perm_insrs<string asm, SDPatternOperator op> {
895900
def _B : sve_int_perm_insrs<0b00, asm, ZPR8, GPR32>;
896901
def _H : sve_int_perm_insrs<0b01, asm, ZPR16, GPR32>;
897902
def _S : sve_int_perm_insrs<0b10, asm, ZPR32, GPR32>;
898903
def _D : sve_int_perm_insrs<0b11, asm, ZPR64, GPR64>;
904+
905+
def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, i32, !cast<Instruction>(NAME # _B)>;
906+
def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, i32, !cast<Instruction>(NAME # _H)>;
907+
def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, i32, !cast<Instruction>(NAME # _S)>;
908+
def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, i64, !cast<Instruction>(NAME # _D)>;
899909
}
900910

901911
class sve_int_perm_insrv<bits<2> sz8_64, string asm, ZPRRegOp zprty,
@@ -914,14 +924,17 @@ class sve_int_perm_insrv<bits<2> sz8_64, string asm, ZPRRegOp zprty,
914924

915925
let Constraints = "$Zdn = $_Zdn";
916926
let DestructiveInstType = Destructive;
917-
let ElementSize = ElementSizeNone;
918927
}
919928

920-
multiclass sve_int_perm_insrv<string asm> {
929+
multiclass sve_int_perm_insrv<string asm, SDPatternOperator op> {
921930
def _B : sve_int_perm_insrv<0b00, asm, ZPR8, FPR8>;
922931
def _H : sve_int_perm_insrv<0b01, asm, ZPR16, FPR16>;
923932
def _S : sve_int_perm_insrv<0b10, asm, ZPR32, FPR32>;
924933
def _D : sve_int_perm_insrv<0b11, asm, ZPR64, FPR64>;
934+
935+
def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, f16, !cast<Instruction>(NAME # _H)>;
936+
def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, f32, !cast<Instruction>(NAME # _S)>;
937+
def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, f64, !cast<Instruction>(NAME # _D)>;
925938
}
926939

927940
//===----------------------------------------------------------------------===//
@@ -3929,7 +3942,8 @@ multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm> {
39293942
}
39303943
}
39313944

3932-
multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm> {
3945+
multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm,
3946+
SDPatternOperator op = null_frag> {
39333947
def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8,
39343948
ElementSizeB>;
39353949
def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16,
@@ -3945,6 +3959,11 @@ multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm> {
39453959
let Inst{22} = imm{5};
39463960
let Inst{9-8} = imm{4-3};
39473961
}
3962+
3963+
def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
3964+
def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
3965+
def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
3966+
def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>;
39483967
}
39493968

39503969
class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc,
@@ -3971,17 +3990,28 @@ class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc,
39713990
let ElementSize = zprty.ElementSize;
39723991
}
39733992

3974-
multiclass sve_int_bin_pred_shift<bits<3> opc, string asm> {
3993+
multiclass sve_int_bin_pred_shift<bits<3> opc, string asm,
3994+
SDPatternOperator op> {
39753995
def _B : sve_int_bin_pred_shift<0b00, 0b0, opc, asm, ZPR8, ZPR8>;
39763996
def _H : sve_int_bin_pred_shift<0b01, 0b0, opc, asm, ZPR16, ZPR16>;
39773997
def _S : sve_int_bin_pred_shift<0b10, 0b0, opc, asm, ZPR32, ZPR32>;
39783998
def _D : sve_int_bin_pred_shift<0b11, 0b0, opc, asm, ZPR64, ZPR64>;
3999+
4000+
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
4001+
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
4002+
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
4003+
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
39794004
}
39804005

3981-
multiclass sve_int_bin_pred_shift_wide<bits<3> opc, string asm> {
4006+
multiclass sve_int_bin_pred_shift_wide<bits<3> opc, string asm,
4007+
SDPatternOperator op> {
39824008
def _B : sve_int_bin_pred_shift<0b00, 0b1, opc, asm, ZPR8, ZPR64>;
39834009
def _H : sve_int_bin_pred_shift<0b01, 0b1, opc, asm, ZPR16, ZPR64>;
39844010
def _S : sve_int_bin_pred_shift<0b10, 0b1, opc, asm, ZPR32, ZPR64>;
4011+
4012+
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv2i64, !cast<Instruction>(NAME # _B)>;
4013+
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv2i64, !cast<Instruction>(NAME # _H)>;
4014+
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv2i64, !cast<Instruction>(NAME # _S)>;
39854015
}
39864016

39874017
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)