Skip to content

Commit 47c88bc

Browse files
committed
[LoongArch] Fix LASX vector_extract codegen
Custom lowering `ISD::EXTRACT_VECTOR_ELT` with lasx.
1 parent d3ddb93 commit 47c88bc

File tree

4 files changed

+113
-57
lines changed

4 files changed

+113
-57
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
286286
setOperationAction(ISD::UNDEF, VT, Legal);
287287

288288
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
289-
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
289+
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
290290
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
291291

292292
setOperationAction(ISD::SETCC, VT, Legal);
@@ -406,6 +406,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
406406
return lowerWRITE_REGISTER(Op, DAG);
407407
case ISD::INSERT_VECTOR_ELT:
408408
return lowerINSERT_VECTOR_ELT(Op, DAG);
409+
case ISD::EXTRACT_VECTOR_ELT:
410+
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
409411
case ISD::BUILD_VECTOR:
410412
return lowerBUILD_VECTOR(Op, DAG);
411413
case ISD::VECTOR_SHUFFLE:
@@ -513,6 +515,23 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
513515
return SDValue();
514516
}
515517

518+
SDValue
519+
LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
520+
SelectionDAG &DAG) const {
521+
EVT VecTy = Op->getOperand(0)->getValueType(0);
522+
SDValue Idx = Op->getOperand(1);
523+
EVT EltTy = VecTy.getVectorElementType();
524+
unsigned NumElts = VecTy.getVectorNumElements();
525+
526+
if (isa<ConstantSDNode>(Idx) &&
527+
(EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
528+
EltTy == MVT::f64 ||
529+
cast<ConstantSDNode>(Idx)->getZExtValue() < NumElts / 2))
530+
return Op;
531+
532+
return SDValue();
533+
}
534+
516535
SDValue
517536
LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
518537
SelectionDAG &DAG) const {

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@ class LoongArchTargetLowering : public TargetLowering {
279279
SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
280280
SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
281281
SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const;
282+
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
282283
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
283284
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
284285
SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 8 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1590,38 +1590,14 @@ def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)),
15901590
(VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>;
15911591
def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)),
15921592
(VPICKVE2GR_H (EXTRACT_SUBREG v16i16:$xj, sub_128), uimm3:$imm)>;
1593-
def : Pat<(i64 (vector_extract v8i32:$xj, uimm2:$imm)),
1594-
(VPICKVE2GR_W (EXTRACT_SUBREG v8i32:$xj, sub_128), uimm2:$imm)>;
1595-
def : Pat<(i64 (vector_extract v4i64:$xj, uimm1:$imm)),
1596-
(VPICKVE2GR_D (EXTRACT_SUBREG v4i64:$xj, sub_128), uimm1:$imm)>;
1597-
def : Pat<(f32 (vector_extract v8f32:$xj, uimm2:$imm)),
1598-
(f32 (EXTRACT_SUBREG (XVREPL128VEI_W v8f32:$xj, uimm2:$imm), sub_32))>;
1599-
def : Pat<(f64 (vector_extract v4f64:$xj, uimm1:$imm)),
1600-
(f64 (EXTRACT_SUBREG (XVREPL128VEI_D v4f64:$xj, uimm1:$imm), sub_64))>;
1601-
1602-
// Vector extraction with variable index.
1603-
def : Pat<(i64 (vector_extract v32i8:$xj, i64:$rk)),
1604-
(SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_B v32i8:$xj,
1605-
i64:$rk),
1606-
sub_32)),
1607-
GPR), (i64 24))>;
1608-
def : Pat<(i64 (vector_extract v16i16:$xj, i64:$rk)),
1609-
(SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_H v16i16:$xj,
1610-
i64:$rk),
1611-
sub_32)),
1612-
GPR), (i64 16))>;
1613-
def : Pat<(i64 (vector_extract v8i32:$xj, i64:$rk)),
1614-
(COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (XVREPLVE_W v8i32:$xj, i64:$rk),
1615-
sub_32)),
1616-
GPR)>;
1617-
def : Pat<(i64 (vector_extract v4i64:$xj, i64:$rk)),
1618-
(COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (XVREPLVE_D v4i64:$xj, i64:$rk),
1619-
sub_64)),
1620-
GPR)>;
1621-
def : Pat<(f32 (vector_extract v8f32:$xj, i64:$rk)),
1622-
(f32 (EXTRACT_SUBREG (XVREPLVE_W v8f32:$xj, i64:$rk), sub_32))>;
1623-
def : Pat<(f64 (vector_extract v4f64:$xj, i64:$rk)),
1624-
(f64 (EXTRACT_SUBREG (XVREPLVE_D v4f64:$xj, i64:$rk), sub_64))>;
1593+
def : Pat<(i64 (vector_extract v8i32:$xj, uimm3:$imm)),
1594+
(XVPICKVE2GR_W v8i32:$xj, uimm3:$imm)>;
1595+
def : Pat<(i64 (vector_extract v4i64:$xj, uimm2:$imm)),
1596+
(XVPICKVE2GR_D v4i64:$xj, uimm2:$imm)>;
1597+
def : Pat<(f32 (vector_extract v8f32:$xj, uimm3:$imm)),
1598+
(MOVGR2FR_W (XVPICKVE2GR_W v8f32:$xj, uimm3:$imm))>;
1599+
def : Pat<(f64 (vector_extract v4f64:$xj, uimm2:$imm)),
1600+
(MOVGR2FR_D (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm))>;
16251601

16261602
// vselect
16271603
def : Pat<(v32i8 (vselect LASX256:$xj, LASX256:$xd,

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll

Lines changed: 84 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ define void @extract_8xi32(ptr %src, ptr %dst) nounwind {
3131
; CHECK-LABEL: extract_8xi32:
3232
; CHECK: # %bb.0:
3333
; CHECK-NEXT: xvld $xr0, $a0, 0
34-
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1
34+
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1
3535
; CHECK-NEXT: st.w $a0, $a1, 0
3636
; CHECK-NEXT: ret
3737
%v = load volatile <8 x i32>, ptr %src
@@ -44,7 +44,7 @@ define void @extract_4xi64(ptr %src, ptr %dst) nounwind {
4444
; CHECK-LABEL: extract_4xi64:
4545
; CHECK: # %bb.0:
4646
; CHECK-NEXT: xvld $xr0, $a0, 0
47-
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
47+
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
4848
; CHECK-NEXT: st.d $a0, $a1, 0
4949
; CHECK-NEXT: ret
5050
%v = load volatile <4 x i64>, ptr %src
@@ -57,8 +57,8 @@ define void @extract_8xfloat(ptr %src, ptr %dst) nounwind {
5757
; CHECK-LABEL: extract_8xfloat:
5858
; CHECK: # %bb.0:
5959
; CHECK-NEXT: xvld $xr0, $a0, 0
60-
; CHECK-NEXT: ori $a0, $zero, 7
61-
; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a0
60+
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
61+
; CHECK-NEXT: movgr2fr.w $fa0, $a0
6262
; CHECK-NEXT: fst.s $fa0, $a1, 0
6363
; CHECK-NEXT: ret
6464
%v = load volatile <8 x float>, ptr %src
@@ -71,8 +71,8 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind {
7171
; CHECK-LABEL: extract_4xdouble:
7272
; CHECK: # %bb.0:
7373
; CHECK-NEXT: xvld $xr0, $a0, 0
74-
; CHECK-NEXT: ori $a0, $zero, 3
75-
; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a0
74+
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
75+
; CHECK-NEXT: movgr2fr.d $fa0, $a0
7676
; CHECK-NEXT: fst.d $fa0, $a1, 0
7777
; CHECK-NEXT: ret
7878
%v = load volatile <4 x double>, ptr %src
@@ -84,12 +84,21 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind {
8484
define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
8585
; CHECK-LABEL: extract_32xi8_idx:
8686
; CHECK: # %bb.0:
87-
; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0
87+
; CHECK-NEXT: addi.d $sp, $sp, -64
88+
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
89+
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
90+
; CHECK-NEXT: addi.d $fp, $sp, 64
91+
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
8892
; CHECK-NEXT: xvld $xr0, $a0, 0
89-
; CHECK-NEXT: xvreplve.b $xr0, $xr0, $a2
90-
; CHECK-NEXT: movfr2gr.s $a0, $fa0
91-
; CHECK-NEXT: srai.w $a0, $a0, 24
93+
; CHECK-NEXT: xvst $xr0, $sp, 0
94+
; CHECK-NEXT: addi.d $a0, $sp, 0
95+
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 0
96+
; CHECK-NEXT: ld.b $a0, $a0, 0
9297
; CHECK-NEXT: st.b $a0, $a1, 0
98+
; CHECK-NEXT: addi.d $sp, $fp, -64
99+
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
100+
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
101+
; CHECK-NEXT: addi.d $sp, $sp, 64
93102
; CHECK-NEXT: ret
94103
%v = load volatile <32 x i8>, ptr %src
95104
%e = extractelement <32 x i8> %v, i32 %idx
@@ -100,12 +109,21 @@ define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
100109
define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
101110
; CHECK-LABEL: extract_16xi16_idx:
102111
; CHECK: # %bb.0:
103-
; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0
112+
; CHECK-NEXT: addi.d $sp, $sp, -64
113+
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
114+
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
115+
; CHECK-NEXT: addi.d $fp, $sp, 64
116+
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
104117
; CHECK-NEXT: xvld $xr0, $a0, 0
105-
; CHECK-NEXT: xvreplve.h $xr0, $xr0, $a2
106-
; CHECK-NEXT: movfr2gr.s $a0, $fa0
107-
; CHECK-NEXT: srai.w $a0, $a0, 16
118+
; CHECK-NEXT: xvst $xr0, $sp, 0
119+
; CHECK-NEXT: addi.d $a0, $sp, 0
120+
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 1
121+
; CHECK-NEXT: ld.h $a0, $a0, 0
108122
; CHECK-NEXT: st.h $a0, $a1, 0
123+
; CHECK-NEXT: addi.d $sp, $fp, -64
124+
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
125+
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
126+
; CHECK-NEXT: addi.d $sp, $sp, 64
109127
; CHECK-NEXT: ret
110128
%v = load volatile <16 x i16>, ptr %src
111129
%e = extractelement <16 x i16> %v, i32 %idx
@@ -116,11 +134,21 @@ define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
116134
define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
117135
; CHECK-LABEL: extract_8xi32_idx:
118136
; CHECK: # %bb.0:
119-
; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0
137+
; CHECK-NEXT: addi.d $sp, $sp, -64
138+
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
139+
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
140+
; CHECK-NEXT: addi.d $fp, $sp, 64
141+
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
120142
; CHECK-NEXT: xvld $xr0, $a0, 0
121-
; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a2
122-
; CHECK-NEXT: movfr2gr.s $a0, $fa0
143+
; CHECK-NEXT: xvst $xr0, $sp, 0
144+
; CHECK-NEXT: addi.d $a0, $sp, 0
145+
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2
146+
; CHECK-NEXT: ld.w $a0, $a0, 0
123147
; CHECK-NEXT: st.w $a0, $a1, 0
148+
; CHECK-NEXT: addi.d $sp, $fp, -64
149+
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
150+
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
151+
; CHECK-NEXT: addi.d $sp, $sp, 64
124152
; CHECK-NEXT: ret
125153
%v = load volatile <8 x i32>, ptr %src
126154
%e = extractelement <8 x i32> %v, i32 %idx
@@ -131,11 +159,21 @@ define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
131159
define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
132160
; CHECK-LABEL: extract_4xi64_idx:
133161
; CHECK: # %bb.0:
134-
; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0
162+
; CHECK-NEXT: addi.d $sp, $sp, -64
163+
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
164+
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
165+
; CHECK-NEXT: addi.d $fp, $sp, 64
166+
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
135167
; CHECK-NEXT: xvld $xr0, $a0, 0
136-
; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a2
137-
; CHECK-NEXT: movfr2gr.d $a0, $fa0
168+
; CHECK-NEXT: xvst $xr0, $sp, 0
169+
; CHECK-NEXT: addi.d $a0, $sp, 0
170+
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3
171+
; CHECK-NEXT: ld.d $a0, $a0, 0
138172
; CHECK-NEXT: st.d $a0, $a1, 0
173+
; CHECK-NEXT: addi.d $sp, $fp, -64
174+
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
175+
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
176+
; CHECK-NEXT: addi.d $sp, $sp, 64
139177
; CHECK-NEXT: ret
140178
%v = load volatile <4 x i64>, ptr %src
141179
%e = extractelement <4 x i64> %v, i32 %idx
@@ -146,10 +184,21 @@ define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
146184
define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
147185
; CHECK-LABEL: extract_8xfloat_idx:
148186
; CHECK: # %bb.0:
149-
; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0
187+
; CHECK-NEXT: addi.d $sp, $sp, -64
188+
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
189+
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
190+
; CHECK-NEXT: addi.d $fp, $sp, 64
191+
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
150192
; CHECK-NEXT: xvld $xr0, $a0, 0
151-
; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a2
193+
; CHECK-NEXT: xvst $xr0, $sp, 0
194+
; CHECK-NEXT: addi.d $a0, $sp, 0
195+
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2
196+
; CHECK-NEXT: fld.s $fa0, $a0, 0
152197
; CHECK-NEXT: fst.s $fa0, $a1, 0
198+
; CHECK-NEXT: addi.d $sp, $fp, -64
199+
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
200+
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
201+
; CHECK-NEXT: addi.d $sp, $sp, 64
153202
; CHECK-NEXT: ret
154203
%v = load volatile <8 x float>, ptr %src
155204
%e = extractelement <8 x float> %v, i32 %idx
@@ -160,10 +209,21 @@ define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
160209
define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
161210
; CHECK-LABEL: extract_4xdouble_idx:
162211
; CHECK: # %bb.0:
163-
; CHECK-NEXT: bstrpick.d $a2, $a2, 31, 0
212+
; CHECK-NEXT: addi.d $sp, $sp, -64
213+
; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
214+
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
215+
; CHECK-NEXT: addi.d $fp, $sp, 64
216+
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
164217
; CHECK-NEXT: xvld $xr0, $a0, 0
165-
; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a2
218+
; CHECK-NEXT: xvst $xr0, $sp, 0
219+
; CHECK-NEXT: addi.d $a0, $sp, 0
220+
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3
221+
; CHECK-NEXT: fld.d $fa0, $a0, 0
166222
; CHECK-NEXT: fst.d $fa0, $a1, 0
223+
; CHECK-NEXT: addi.d $sp, $fp, -64
224+
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
225+
; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
226+
; CHECK-NEXT: addi.d $sp, $sp, 64
167227
; CHECK-NEXT: ret
168228
%v = load volatile <4 x double>, ptr %src
169229
%e = extractelement <4 x double> %v, i32 %idx

0 commit comments

Comments
 (0)