Skip to content

Commit 4853bf0

Browse files
authored
[LoongArch] Lower build_vector to broadcast load if possible (#135896)
1 parent b6820c3 commit 4853bf0

File tree

7 files changed

+116
-56
lines changed

7 files changed

+116
-56
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1876,6 +1876,51 @@ static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
18761876
return false;
18771877
}
18781878

1879+
// Lower BUILD_VECTOR as broadcast load (if possible).
1880+
// For example:
1881+
// %a = load i8, ptr %ptr
1882+
// %b = build_vector %a, %a, %a, %a
1883+
// is lowered to :
1884+
// (VLDREPL_B $a0, 0)
1885+
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
1886+
const SDLoc &DL,
1887+
SelectionDAG &DAG) {
1888+
MVT VT = BVOp->getSimpleValueType(0);
1889+
int NumOps = BVOp->getNumOperands();
1890+
1891+
assert((VT.is128BitVector() || VT.is256BitVector()) &&
1892+
"Unsupported vector type for broadcast.");
1893+
1894+
SDValue IdentitySrc;
1895+
bool IsIdeneity = true;
1896+
1897+
for (int i = 0; i != NumOps; i++) {
1898+
SDValue Op = BVOp->getOperand(i);
1899+
if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
1900+
IsIdeneity = false;
1901+
break;
1902+
}
1903+
IdentitySrc = BVOp->getOperand(0);
1904+
}
1905+
1906+
// make sure that this load is valid and only has one user.
1907+
if (!IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
1908+
return SDValue();
1909+
1910+
if (IsIdeneity) {
1911+
auto *LN = cast<LoadSDNode>(IdentitySrc);
1912+
SDVTList Tys =
1913+
LN->isIndexed()
1914+
? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
1915+
: DAG.getVTList(VT, MVT::Other);
1916+
SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()};
1917+
SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
1918+
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
1919+
return BCast;
1920+
}
1921+
return SDValue();
1922+
}
1923+
18791924
SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
18801925
SelectionDAG &DAG) const {
18811926
BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
@@ -1891,6 +1936,9 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
18911936
(!Subtarget.hasExtLASX() || !Is256Vec))
18921937
return SDValue();
18931938

1939+
if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
1940+
return Result;
1941+
18941942
if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
18951943
/*MinSplatBits=*/8) &&
18961944
SplatBitSize <= 64) {
@@ -5326,6 +5374,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
53265374
NODE_NAME_CASE(VSRLI)
53275375
NODE_NAME_CASE(VBSLL)
53285376
NODE_NAME_CASE(VBSRL)
5377+
NODE_NAME_CASE(VLDREPL)
53295378
}
53305379
#undef NODE_NAME_CASE
53315380
return nullptr;

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,10 @@ enum NodeType : unsigned {
155155

156156
// Vector byte logicial left / right shift
157157
VBSLL,
158-
VBSRL
158+
VBSRL,
159+
160+
// Scalar load broadcast to vector
161+
VLDREPL
159162

160163
// Intrinsic operations end =============================================
161164
};

llvm/lib/Target/LoongArch/LoongArchInstrInfo.td

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,8 @@ def simm8_lsl # I : Operand<GRLenVT> {
307307
}
308308
}
309309

310-
def simm9_lsl3 : Operand<GRLenVT> {
310+
def simm9_lsl3 : Operand<GRLenVT>,
311+
ImmLeaf<GRLenVT, [{return isShiftedInt<9,3>(Imm);}]> {
311312
let ParserMatchClass = SImmAsmOperand<9, "lsl3">;
312313
let EncoderMethod = "getImmOpValueAsr<3>";
313314
let DecoderMethod = "decodeSImmOperand<9, 3>";
@@ -317,13 +318,15 @@ def simm10 : Operand<GRLenVT> {
317318
let ParserMatchClass = SImmAsmOperand<10>;
318319
}
319320

320-
def simm10_lsl2 : Operand<GRLenVT> {
321+
def simm10_lsl2 : Operand<GRLenVT>,
322+
ImmLeaf<GRLenVT, [{return isShiftedInt<10,2>(Imm);}]> {
321323
let ParserMatchClass = SImmAsmOperand<10, "lsl2">;
322324
let EncoderMethod = "getImmOpValueAsr<2>";
323325
let DecoderMethod = "decodeSImmOperand<10, 2>";
324326
}
325327

326-
def simm11_lsl1 : Operand<GRLenVT> {
328+
def simm11_lsl1 : Operand<GRLenVT>,
329+
ImmLeaf<GRLenVT, [{return isShiftedInt<11,1>(Imm);}]> {
327330
let ParserMatchClass = SImmAsmOperand<11, "lsl1">;
328331
let EncoderMethod = "getImmOpValueAsr<1>";
329332
let DecoderMethod = "decodeSImmOperand<11, 1>";

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2165,6 +2165,7 @@ def : Pat<(int_loongarch_lasx_xvld GPR:$rj, timm:$imm),
21652165
def : Pat<(int_loongarch_lasx_xvldx GPR:$rj, GPR:$rk),
21662166
(XVLDX GPR:$rj, GPR:$rk)>;
21672167

2168+
// xvldrepl
21682169
def : Pat<(int_loongarch_lasx_xvldrepl_b GPR:$rj, timm:$imm),
21692170
(XVLDREPL_B GPR:$rj, (to_valid_timm timm:$imm))>;
21702171
def : Pat<(int_loongarch_lasx_xvldrepl_h GPR:$rj, timm:$imm),
@@ -2174,6 +2175,13 @@ def : Pat<(int_loongarch_lasx_xvldrepl_w GPR:$rj, timm:$imm),
21742175
def : Pat<(int_loongarch_lasx_xvldrepl_d GPR:$rj, timm:$imm),
21752176
(XVLDREPL_D GPR:$rj, (to_valid_timm timm:$imm))>;
21762177

2178+
defm : VldreplPat<v32i8, XVLDREPL_B, simm12_addlike>;
2179+
defm : VldreplPat<v16i16, XVLDREPL_H, simm11_lsl1>;
2180+
defm : VldreplPat<v8i32, XVLDREPL_W, simm10_lsl2>;
2181+
defm : VldreplPat<v4i64, XVLDREPL_D, simm9_lsl3>;
2182+
defm : VldreplPat<v8f32, XVLDREPL_W, simm10_lsl2>;
2183+
defm : VldreplPat<v4f64, XVLDREPL_D, simm9_lsl3>;
2184+
21772185
// store
21782186
def : Pat<(int_loongarch_lasx_xvst LASX256:$xd, GPR:$rj, timm:$imm),
21792187
(XVST LASX256:$xd, GPR:$rj, (to_valid_timm timm:$imm))>;

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
2626
def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>]>;
2727
def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
2828
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
29+
def SDT_LoongArchVLDREPL : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisPtrTy<1>]>;
2930

3031
// Target nodes.
3132
def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>;
@@ -64,6 +65,10 @@ def loongarch_vsrli : SDNode<"LoongArchISD::VSRLI", SDT_LoongArchV1RUimm>;
6465
def loongarch_vbsll : SDNode<"LoongArchISD::VBSLL", SDT_LoongArchV1RUimm>;
6566
def loongarch_vbsrl : SDNode<"LoongArchISD::VBSRL", SDT_LoongArchV1RUimm>;
6667

68+
def loongarch_vldrepl
69+
: SDNode<"LoongArchISD::VLDREPL",
70+
SDT_LoongArchVLDREPL, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
71+
6772
def immZExt1 : ImmLeaf<i64, [{return isUInt<1>(Imm);}]>;
6873
def immZExt2 : ImmLeaf<i64, [{return isUInt<2>(Imm);}]>;
6974
def immZExt3 : ImmLeaf<i64, [{return isUInt<3>(Imm);}]>;
@@ -1433,6 +1438,14 @@ multiclass PatCCVrVrF<CondCode CC, string Inst> {
14331438
(!cast<LAInst>(Inst#"_D") LSX128:$vj, LSX128:$vk)>;
14341439
}
14351440

1441+
multiclass VldreplPat<ValueType vt, LAInst Inst, Operand ImmOpnd> {
1442+
def : Pat<(vt(loongarch_vldrepl BaseAddr:$rj)), (Inst BaseAddr:$rj, 0)>;
1443+
def : Pat<(vt(loongarch_vldrepl(AddrConstant GPR:$rj, ImmOpnd:$imm))),
1444+
(Inst GPR:$rj, ImmOpnd:$imm)>;
1445+
def : Pat<(vt(loongarch_vldrepl(AddLike BaseAddr:$rj, ImmOpnd:$imm))),
1446+
(Inst BaseAddr:$rj, ImmOpnd:$imm)>;
1447+
}
1448+
14361449
let Predicates = [HasExtLSX] in {
14371450

14381451
// VADD_{B/H/W/D}
@@ -2342,6 +2355,7 @@ def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm),
23422355
def : Pat<(int_loongarch_lsx_vldx GPR:$rj, GPR:$rk),
23432356
(VLDX GPR:$rj, GPR:$rk)>;
23442357

2358+
// vldrepl
23452359
def : Pat<(int_loongarch_lsx_vldrepl_b GPR:$rj, timm:$imm),
23462360
(VLDREPL_B GPR:$rj, (to_valid_timm timm:$imm))>;
23472361
def : Pat<(int_loongarch_lsx_vldrepl_h GPR:$rj, timm:$imm),
@@ -2351,6 +2365,13 @@ def : Pat<(int_loongarch_lsx_vldrepl_w GPR:$rj, timm:$imm),
23512365
def : Pat<(int_loongarch_lsx_vldrepl_d GPR:$rj, timm:$imm),
23522366
(VLDREPL_D GPR:$rj, (to_valid_timm timm:$imm))>;
23532367

2368+
defm : VldreplPat<v16i8, VLDREPL_B, simm12_addlike>;
2369+
defm : VldreplPat<v8i16, VLDREPL_H, simm11_lsl1>;
2370+
defm : VldreplPat<v4i32, VLDREPL_W, simm10_lsl2>;
2371+
defm : VldreplPat<v2i64, VLDREPL_D, simm9_lsl3>;
2372+
defm : VldreplPat<v4f32, VLDREPL_W, simm10_lsl2>;
2373+
defm : VldreplPat<v2f64, VLDREPL_D, simm9_lsl3>;
2374+
23542375
// store
23552376
def : Pat<(int_loongarch_lsx_vst LSX128:$vd, GPR:$rj, timm:$imm),
23562377
(VST LSX128:$vd, GPR:$rj, (to_valid_timm timm:$imm))>;

llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ define <4 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst) {
2121
define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) {
2222
; CHECK-LABEL: xvldrepl_d_unaligned_offset:
2323
; CHECK: # %bb.0:
24-
; CHECK-NEXT: ld.d $a0, $a0, 4
25-
; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0
24+
; CHECK-NEXT: addi.d $a0, $a0, 4
25+
; CHECK-NEXT: xvldrepl.d $xr0, $a0, 0
2626
; CHECK-NEXT: ret
2727
%p = getelementptr i32, ptr %ptr, i32 1
2828
%tmp = load i64, ptr %p
@@ -34,8 +34,7 @@ define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) {
3434
define <32 x i8> @xvldrepl_b(ptr %ptr) {
3535
; CHECK-LABEL: xvldrepl_b:
3636
; CHECK: # %bb.0:
37-
; CHECK-NEXT: ld.b $a0, $a0, 0
38-
; CHECK-NEXT: xvreplgr2vr.b $xr0, $a0
37+
; CHECK-NEXT: xvldrepl.b $xr0, $a0, 0
3938
; CHECK-NEXT: ret
4039
%tmp = load i8, ptr %ptr
4140
%tmp1 = insertelement <32 x i8> zeroinitializer, i8 %tmp, i32 0
@@ -46,8 +45,7 @@ define <32 x i8> @xvldrepl_b(ptr %ptr) {
4645
define <32 x i8> @xvldrepl_b_offset(ptr %ptr) {
4746
; CHECK-LABEL: xvldrepl_b_offset:
4847
; CHECK: # %bb.0:
49-
; CHECK-NEXT: ld.b $a0, $a0, 33
50-
; CHECK-NEXT: xvreplgr2vr.b $xr0, $a0
48+
; CHECK-NEXT: xvldrepl.b $xr0, $a0, 33
5149
; CHECK-NEXT: ret
5250
%p = getelementptr i8, ptr %ptr, i64 33
5351
%tmp = load i8, ptr %p
@@ -60,8 +58,7 @@ define <32 x i8> @xvldrepl_b_offset(ptr %ptr) {
6058
define <16 x i16> @xvldrepl_h(ptr %ptr) {
6159
; CHECK-LABEL: xvldrepl_h:
6260
; CHECK: # %bb.0:
63-
; CHECK-NEXT: ld.h $a0, $a0, 0
64-
; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0
61+
; CHECK-NEXT: xvldrepl.h $xr0, $a0, 0
6562
; CHECK-NEXT: ret
6663
%tmp = load i16, ptr %ptr
6764
%tmp1 = insertelement <16 x i16> zeroinitializer, i16 %tmp, i32 0
@@ -72,8 +69,7 @@ define <16 x i16> @xvldrepl_h(ptr %ptr) {
7269
define <16 x i16> @xvldrepl_h_offset(ptr %ptr) {
7370
; CHECK-LABEL: xvldrepl_h_offset:
7471
; CHECK: # %bb.0:
75-
; CHECK-NEXT: ld.h $a0, $a0, 66
76-
; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0
72+
; CHECK-NEXT: xvldrepl.h $xr0, $a0, 66
7773
; CHECK-NEXT: ret
7874
%p = getelementptr i16, ptr %ptr, i64 33
7975
%tmp = load i16, ptr %p
@@ -85,8 +81,7 @@ define <16 x i16> @xvldrepl_h_offset(ptr %ptr) {
8581
define <8 x i32> @xvldrepl_w(ptr %ptr) {
8682
; CHECK-LABEL: xvldrepl_w:
8783
; CHECK: # %bb.0:
88-
; CHECK-NEXT: ld.w $a0, $a0, 0
89-
; CHECK-NEXT: xvreplgr2vr.w $xr0, $a0
84+
; CHECK-NEXT: xvldrepl.w $xr0, $a0, 0
9085
; CHECK-NEXT: ret
9186
%tmp = load i32, ptr %ptr
9287
%tmp1 = insertelement <8 x i32> zeroinitializer, i32 %tmp, i32 0
@@ -97,8 +92,7 @@ define <8 x i32> @xvldrepl_w(ptr %ptr) {
9792
define <8 x i32> @xvldrepl_w_offset(ptr %ptr) {
9893
; CHECK-LABEL: xvldrepl_w_offset:
9994
; CHECK: # %bb.0:
100-
; CHECK-NEXT: ld.w $a0, $a0, 132
101-
; CHECK-NEXT: xvreplgr2vr.w $xr0, $a0
95+
; CHECK-NEXT: xvldrepl.w $xr0, $a0, 132
10296
; CHECK-NEXT: ret
10397
%p = getelementptr i32, ptr %ptr, i64 33
10498
%tmp = load i32, ptr %p
@@ -111,8 +105,7 @@ define <8 x i32> @xvldrepl_w_offset(ptr %ptr) {
111105
define <4 x i64> @xvldrepl_d(ptr %ptr) {
112106
; CHECK-LABEL: xvldrepl_d:
113107
; CHECK: # %bb.0:
114-
; CHECK-NEXT: ld.d $a0, $a0, 0
115-
; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0
108+
; CHECK-NEXT: xvldrepl.d $xr0, $a0, 0
116109
; CHECK-NEXT: ret
117110
%tmp = load i64, ptr %ptr
118111
%tmp1 = insertelement <4 x i64> zeroinitializer, i64 %tmp, i32 0
@@ -123,8 +116,7 @@ define <4 x i64> @xvldrepl_d(ptr %ptr) {
123116
define <4 x i64> @xvldrepl_d_offset(ptr %ptr) {
124117
; CHECK-LABEL: xvldrepl_d_offset:
125118
; CHECK: # %bb.0:
126-
; CHECK-NEXT: ld.d $a0, $a0, 264
127-
; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0
119+
; CHECK-NEXT: xvldrepl.d $xr0, $a0, 264
128120
; CHECK-NEXT: ret
129121
%p = getelementptr i64, ptr %ptr, i64 33
130122
%tmp = load i64, ptr %p
@@ -136,8 +128,7 @@ define <4 x i64> @xvldrepl_d_offset(ptr %ptr) {
136128
define <8 x float> @vldrepl_w_flt(ptr %ptr) {
137129
; CHECK-LABEL: vldrepl_w_flt:
138130
; CHECK: # %bb.0:
139-
; CHECK-NEXT: fld.s $fa0, $a0, 0
140-
; CHECK-NEXT: xvreplve0.w $xr0, $xr0
131+
; CHECK-NEXT: xvldrepl.w $xr0, $a0, 0
141132
; CHECK-NEXT: ret
142133
%tmp = load float, ptr %ptr
143134
%tmp1 = insertelement <8 x float> zeroinitializer, float %tmp, i32 0
@@ -148,8 +139,7 @@ define <8 x float> @vldrepl_w_flt(ptr %ptr) {
148139
define <8 x float> @vldrepl_w_flt_offset(ptr %ptr) {
149140
; CHECK-LABEL: vldrepl_w_flt_offset:
150141
; CHECK: # %bb.0:
151-
; CHECK-NEXT: fld.s $fa0, $a0, 264
152-
; CHECK-NEXT: xvreplve0.w $xr0, $xr0
142+
; CHECK-NEXT: xvldrepl.w $xr0, $a0, 264
153143
; CHECK-NEXT: ret
154144
%p = getelementptr i64, ptr %ptr, i64 33
155145
%tmp = load float, ptr %p
@@ -161,8 +151,7 @@ define <8 x float> @vldrepl_w_flt_offset(ptr %ptr) {
161151
define <4 x double> @vldrepl_d_dbl(ptr %ptr) {
162152
; CHECK-LABEL: vldrepl_d_dbl:
163153
; CHECK: # %bb.0:
164-
; CHECK-NEXT: fld.d $fa0, $a0, 0
165-
; CHECK-NEXT: xvreplve0.d $xr0, $xr0
154+
; CHECK-NEXT: xvldrepl.d $xr0, $a0, 0
166155
; CHECK-NEXT: ret
167156
%tmp = load double, ptr %ptr
168157
%tmp1 = insertelement <4 x double> zeroinitializer, double %tmp, i32 0
@@ -173,8 +162,7 @@ define <4 x double> @vldrepl_d_dbl(ptr %ptr) {
173162
define <4 x double> @vldrepl_d_dbl_offset(ptr %ptr) {
174163
; CHECK-LABEL: vldrepl_d_dbl_offset:
175164
; CHECK: # %bb.0:
176-
; CHECK-NEXT: fld.d $fa0, $a0, 264
177-
; CHECK-NEXT: xvreplve0.d $xr0, $xr0
165+
; CHECK-NEXT: xvldrepl.d $xr0, $a0, 264
178166
; CHECK-NEXT: ret
179167
%p = getelementptr i64, ptr %ptr, i64 33
180168
%tmp = load double, ptr %p

0 commit comments

Comments
 (0)