Skip to content

Commit d056d5d

Browse files
committed
[RISCV] Use custom isel for vector indexed load/store intrinsics.
There are many legal combinations of index and data VTs supported for these intrinsics. This results in a lot of isel patterns in RISCVGenDAGISel.inc. By adding a separate table similar to what we use for segment load/stores, we can more efficiently manually select these intrinsics. We should also be able to reuse this table scalable vector gather/scatter. This reduces the llc binary size by ~56K. Reviewed By: khchen Differential Revision: https://reviews.llvm.org/D97033
1 parent dbf910f commit d056d5d

File tree

3 files changed

+162
-57
lines changed

3 files changed

+162
-57
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ namespace RISCV {
3131
#define GET_RISCVVLSEGTable_IMPL
3232
#define GET_RISCVVLXSEGTable_IMPL
3333
#define GET_RISCVVSXSEGTable_IMPL
34+
#define GET_RISCVVLXTable_IMPL
35+
#define GET_RISCVVSXTable_IMPL
3436
#include "RISCVGenSearchableTables.inc"
3537
} // namespace RISCV
3638
} // namespace llvm
@@ -666,6 +668,50 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
666668
selectVLSEGFF(Node, /*IsMasked*/ true);
667669
return;
668670
}
671+
case Intrinsic::riscv_vloxei:
672+
case Intrinsic::riscv_vloxei_mask:
673+
case Intrinsic::riscv_vluxei:
674+
case Intrinsic::riscv_vluxei_mask: {
675+
bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
676+
IntNo == Intrinsic::riscv_vluxei_mask;
677+
bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
678+
IntNo == Intrinsic::riscv_vloxei_mask;
679+
680+
SDLoc DL(Node);
681+
MVT VT = Node->getSimpleValueType(0);
682+
unsigned ScalarSize = VT.getScalarSizeInBits();
683+
MVT XLenVT = Subtarget->getXLenVT();
684+
SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT);
685+
686+
unsigned CurOp = 2;
687+
SmallVector<SDValue, 7> Operands;
688+
if (IsMasked)
689+
Operands.push_back(Node->getOperand(CurOp++));
690+
Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
691+
Operands.push_back(Node->getOperand(CurOp++)); // Index.
692+
MVT IndexVT = Operands.back()->getSimpleValueType(0);
693+
if (IsMasked)
694+
Operands.push_back(Node->getOperand(CurOp++)); // Mask.
695+
SDValue VL;
696+
selectVLOp(Node->getOperand(CurOp++), VL);
697+
Operands.push_back(VL);
698+
Operands.push_back(SEW);
699+
Operands.push_back(Node->getOperand(0)); // Chain.
700+
701+
assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
702+
"Element count mismatch");
703+
704+
RISCVVLMUL LMUL = getLMUL(VT);
705+
RISCVVLMUL IndexLMUL = getLMUL(IndexVT);
706+
unsigned IndexScalarSize = IndexVT.getScalarSizeInBits();
707+
const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
708+
IsMasked, IsOrdered, IndexScalarSize, static_cast<unsigned>(LMUL),
709+
static_cast<unsigned>(IndexLMUL));
710+
SDNode *Load =
711+
CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
712+
ReplaceNode(Node, Load);
713+
return;
714+
}
669715
}
670716
break;
671717
}
@@ -748,6 +794,49 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
748794
case Intrinsic::riscv_vsuxseg8_mask:
749795
selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
750796
return;
797+
case Intrinsic::riscv_vsoxei:
798+
case Intrinsic::riscv_vsoxei_mask:
799+
case Intrinsic::riscv_vsuxei:
800+
case Intrinsic::riscv_vsuxei_mask: {
801+
bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
802+
IntNo == Intrinsic::riscv_vsuxei_mask;
803+
bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
804+
IntNo == Intrinsic::riscv_vsoxei_mask;
805+
806+
SDLoc DL(Node);
807+
MVT VT = Node->getOperand(2)->getSimpleValueType(0);
808+
unsigned ScalarSize = VT.getScalarSizeInBits();
809+
MVT XLenVT = Subtarget->getXLenVT();
810+
SDValue SEW = CurDAG->getTargetConstant(ScalarSize, DL, XLenVT);
811+
812+
unsigned CurOp = 2;
813+
SmallVector<SDValue, 6> Operands;
814+
Operands.push_back(Node->getOperand(CurOp++)); // Store value.
815+
Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
816+
Operands.push_back(Node->getOperand(CurOp++)); // Index.
817+
MVT IndexVT = Operands.back()->getSimpleValueType(0);
818+
if (IsMasked)
819+
Operands.push_back(Node->getOperand(CurOp++)); // Mask.
820+
SDValue VL;
821+
selectVLOp(Node->getOperand(CurOp++), VL);
822+
Operands.push_back(VL);
823+
Operands.push_back(SEW);
824+
Operands.push_back(Node->getOperand(0)); // Chain.
825+
826+
assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
827+
"Element count mismatch");
828+
829+
RISCVVLMUL LMUL = getLMUL(VT);
830+
RISCVVLMUL IndexLMUL = getLMUL(IndexVT);
831+
unsigned IndexScalarSize = IndexVT.getScalarSizeInBits();
832+
const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
833+
IsMasked, IsOrdered, IndexScalarSize, static_cast<unsigned>(LMUL),
834+
static_cast<unsigned>(IndexLMUL));
835+
SDNode *Store =
836+
CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
837+
ReplaceNode(Node, Store);
838+
return;
839+
}
751840
}
752841
break;
753842
}

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,10 +126,21 @@ struct VSXSEGPseudo {
126126
uint16_t Pseudo;
127127
};
128128

129+
struct VLX_VSXPseudo {
130+
uint8_t Masked;
131+
uint8_t Ordered;
132+
uint8_t SEW;
133+
uint8_t LMUL;
134+
uint8_t IndexLMUL;
135+
uint16_t Pseudo;
136+
};
137+
129138
#define GET_RISCVVSSEGTable_DECL
130139
#define GET_RISCVVLSEGTable_DECL
131140
#define GET_RISCVVLXSEGTable_DECL
132141
#define GET_RISCVVSXSEGTable_DECL
142+
#define GET_RISCVVLXTable_DECL
143+
#define GET_RISCVVSXTable_DECL
133144
#include "RISCVGenSearchableTables.inc"
134145
} // namespace RISCV
135146

llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td

Lines changed: 62 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,40 @@ def RISCVVIntrinsicsTable : GenericTable {
413413
let PrimaryKeyName = "getRISCVVIntrinsicInfo";
414414
}
415415

416+
class RISCVVLX<bit M, bit O, bits<7> S, bits<3> L, bits<3> IL> {
417+
bits<1> Masked = M;
418+
bits<1> Ordered = O;
419+
bits<7> SEW = S;
420+
bits<3> LMUL = L;
421+
bits<3> IndexLMUL = IL;
422+
Pseudo Pseudo = !cast<Pseudo>(NAME);
423+
}
424+
425+
def RISCVVLXTable : GenericTable {
426+
let FilterClass = "RISCVVLX";
427+
let CppTypeName = "VLX_VSXPseudo";
428+
let Fields = ["Masked", "Ordered", "SEW", "LMUL", "IndexLMUL", "Pseudo"];
429+
let PrimaryKey = ["Masked", "Ordered", "SEW", "LMUL", "IndexLMUL"];
430+
let PrimaryKeyName = "getVLXPseudo";
431+
}
432+
433+
class RISCVVSX<bit M, bit O, bits<7> S, bits<3> L, bits<3> IL> {
434+
bits<1> Masked = M;
435+
bits<1> Ordered = O;
436+
bits<7> SEW = S;
437+
bits<3> LMUL = L;
438+
bits<3> IndexLMUL = IL;
439+
Pseudo Pseudo = !cast<Pseudo>(NAME);
440+
}
441+
442+
def RISCVVSXTable : GenericTable {
443+
let FilterClass = "RISCVVSX";
444+
let CppTypeName = "VLX_VSXPseudo";
445+
let Fields = ["Masked", "Ordered", "SEW", "LMUL", "IndexLMUL", "Pseudo"];
446+
let PrimaryKey = ["Masked", "Ordered", "SEW", "LMUL", "IndexLMUL"];
447+
let PrimaryKeyName = "getVSXPseudo";
448+
}
449+
416450
class RISCVVLSEG<bits<4> N, bit M, bit Str, bit F, bits<7> S, bits<3> L> {
417451
bits<4> NF = N;
418452
bits<1> Masked = M;
@@ -616,10 +650,12 @@ class VPseudoSLoadMask<VReg RetClass>:
616650
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
617651
}
618652

619-
class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass>:
653+
class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL,
654+
bit Ordered>:
620655
Pseudo<(outs RetClass:$rd),
621656
(ins GPR:$rs1, IdxClass:$rs2, GPR:$vl, ixlenimm:$sew),[]>,
622-
RISCVVPseudo {
657+
RISCVVPseudo,
658+
RISCVVLX</*Masked*/0, Ordered, EEW, VLMul, LMUL> {
623659
let mayLoad = 1;
624660
let mayStore = 0;
625661
let hasSideEffects = 0;
@@ -631,12 +667,14 @@ class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass>:
631667
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
632668
}
633669

634-
class VPseudoILoadMask<VReg RetClass, VReg IdxClass>:
670+
class VPseudoILoadMask<VReg RetClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL,
671+
bit Ordered>:
635672
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
636673
(ins GetVRegNoV0<RetClass>.R:$merge,
637674
GPR:$rs1, IdxClass:$rs2,
638675
VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
639-
RISCVVPseudo {
676+
RISCVVPseudo,
677+
RISCVVLX</*Masked*/1, Ordered, EEW, VLMul, LMUL> {
640678
let mayLoad = 1;
641679
let mayStore = 0;
642680
let hasSideEffects = 0;
@@ -877,10 +915,12 @@ class VPseudoBinaryNoMask<VReg RetClass,
877915
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
878916
}
879917

880-
class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass>:
918+
class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL,
919+
bit Ordered>:
881920
Pseudo<(outs),
882921
(ins StClass:$rd, GPR:$rs1, IdxClass:$rs2, GPR:$vl, ixlenimm:$sew),[]>,
883-
RISCVVPseudo {
922+
RISCVVPseudo,
923+
RISCVVSX</*Masked*/0, Ordered, EEW, VLMul, LMUL> {
884924
let mayLoad = 0;
885925
let mayStore = 1;
886926
let hasSideEffects = 0;
@@ -892,10 +932,12 @@ class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass>:
892932
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
893933
}
894934

895-
class VPseudoIStoreMask<VReg StClass, VReg IdxClass>:
935+
class VPseudoIStoreMask<VReg StClass, VReg IdxClass, bits<7> EEW, bits<3> LMUL,
936+
bit Ordered>:
896937
Pseudo<(outs),
897938
(ins StClass:$rd, GPR:$rs1, IdxClass:$rs2, VMaskOp:$vm, GPR:$vl, ixlenimm:$sew),[]>,
898-
RISCVVPseudo {
939+
RISCVVPseudo,
940+
RISCVVSX</*Masked*/1, Ordered, EEW, VLMul, LMUL> {
899941
let mayLoad = 0;
900942
let mayStore = 1;
901943
let hasSideEffects = 0;
@@ -1284,7 +1326,7 @@ multiclass VPseudoSLoad {
12841326
}
12851327
}
12861328

1287-
multiclass VPseudoILoad {
1329+
multiclass VPseudoILoad<bit Ordered> {
12881330
foreach eew = EEWList in {
12891331
foreach sew = EEWList in {
12901332
foreach lmul = MxSet<sew>.m in {
@@ -1298,8 +1340,10 @@ multiclass VPseudoILoad {
12981340
defvar Vreg = lmul.vrclass;
12991341
defvar IdxVreg = idx_lmul.vrclass;
13001342
let VLMul = lmul.value in {
1301-
def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo : VPseudoILoadNoMask<Vreg, IdxVreg>;
1302-
def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo # "_MASK" : VPseudoILoadMask<Vreg, IdxVreg>;
1343+
def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo :
1344+
VPseudoILoadNoMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered>;
1345+
def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo # "_MASK" :
1346+
VPseudoILoadMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered>;
13031347
}
13041348
}
13051349
}
@@ -1341,7 +1385,7 @@ multiclass VPseudoSStore {
13411385
}
13421386
}
13431387

1344-
multiclass VPseudoIStore {
1388+
multiclass VPseudoIStore<bit Ordered> {
13451389
foreach eew = EEWList in {
13461390
foreach sew = EEWList in {
13471391
foreach lmul = MxSet<sew>.m in {
@@ -1356,9 +1400,9 @@ multiclass VPseudoIStore {
13561400
defvar IdxVreg = idx_lmul.vrclass;
13571401
let VLMul = lmul.value in {
13581402
def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo :
1359-
VPseudoIStoreNoMask<Vreg, IdxVreg>;
1403+
VPseudoIStoreNoMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered>;
13601404
def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo # "_MASK" :
1361-
VPseudoIStoreMask<Vreg, IdxVreg>;
1405+
VPseudoIStoreMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered>;
13621406
}
13631407
}
13641408
}
@@ -3263,10 +3307,10 @@ defm PseudoVSS : VPseudoSStore;
32633307
//===----------------------------------------------------------------------===//
32643308

32653309
// Vector Indexed Loads and Stores
3266-
defm PseudoVLUX : VPseudoILoad;
3267-
defm PseudoVLOX : VPseudoILoad;
3268-
defm PseudoVSOX : VPseudoIStore;
3269-
defm PseudoVSUX : VPseudoIStore;
3310+
defm PseudoVLUX : VPseudoILoad</*Ordered=*/false>;
3311+
defm PseudoVLOX : VPseudoILoad</*Ordered=*/true>;
3312+
defm PseudoVSOX : VPseudoIStore</*Ordered=*/true>;
3313+
defm PseudoVSUX : VPseudoIStore</*Ordered=*/false>;
32703314

32713315
//===----------------------------------------------------------------------===//
32723316
// 7.7. Unit-stride Fault-Only-First Loads
@@ -3844,45 +3888,6 @@ foreach vti = AllVectors in
38443888
vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>;
38453889
}
38463890

3847-
//===----------------------------------------------------------------------===//
3848-
// 7.6 Vector Indexed Instructions
3849-
//===----------------------------------------------------------------------===//
3850-
3851-
foreach vti = AllVectors in
3852-
foreach eew = EEWList in {
3853-
defvar vlmul = vti.LMul;
3854-
defvar octuple_lmul = octuple_from_str<vti.LMul.MX>.ret;
3855-
defvar log_sew = shift_amount<vti.SEW>.val;
3856-
// The data vector register group has EEW=SEW, EMUL=LMUL, while the offset
3857-
// vector register group has EEW encoding in the instruction and EMUL=(EEW/SEW)*LMUL.
3858-
// calculate octuple elmul which is (eew * octuple_lmul) >> log_sew
3859-
defvar octuple_elmul = !srl(!mul(eew, octuple_lmul), log_sew);
3860-
// legal octuple elmul should be more than 0 and less than equal 64
3861-
if !gt(octuple_elmul, 0) then {
3862-
if !le(octuple_elmul, 64) then {
3863-
defvar elmul_str = octuple_to_str<octuple_elmul>.ret;
3864-
defvar elmul =!cast<LMULInfo>("V_" # elmul_str);
3865-
defvar idx_vti = !cast<VTypeInfo>("VI" # eew # elmul_str);
3866-
3867-
defm : VPatILoad<"int_riscv_vluxei",
3868-
"PseudoVLUXEI"#eew,
3869-
vti.Vector, idx_vti.Vector, vti.Mask, vti.SEW,
3870-
vlmul, elmul, vti.RegClass, idx_vti.RegClass>;
3871-
defm : VPatILoad<"int_riscv_vloxei",
3872-
"PseudoVLOXEI"#eew,
3873-
vti.Vector, idx_vti.Vector, vti.Mask, vti.SEW,
3874-
vlmul, elmul, vti.RegClass, idx_vti.RegClass>;
3875-
defm : VPatIStore<"int_riscv_vsoxei",
3876-
"PseudoVSOXEI"#eew,
3877-
vti.Vector, idx_vti.Vector, vti.Mask, vti.SEW,
3878-
vlmul, elmul, vti.RegClass, idx_vti.RegClass>;
3879-
defm : VPatIStore<"int_riscv_vsuxei",
3880-
"PseudoVSUXEI"#eew,
3881-
vti.Vector, idx_vti.Vector, vti.Mask, vti.SEW,
3882-
vlmul, elmul, vti.RegClass, idx_vti.RegClass>;
3883-
}
3884-
}
3885-
}
38863891
} // Predicates = [HasStdExtV]
38873892

38883893
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)