Skip to content

Commit eeba70a

Browse files
author
Simon Moll
committed
[VE] Expand single-element BUILD_VECTOR to INSERT_VECTOR_ELT
We do this mostly to be able to test the insert_vector_elt isel patterns. As long as we don't, most single element insertions show up as `BUILD_VECTOR` in the backend. Reviewed By: kaz7 Differential Revision: https://reviews.llvm.org/D93759
1 parent d1b606f commit eeba70a

File tree

3 files changed

+79
-36
lines changed

3 files changed

+79
-36
lines changed

llvm/lib/Target/VE/VEISelLowering.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1602,6 +1602,32 @@ SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
16021602
}
16031603
}
16041604

1605+
static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) {
1606+
if (!isa<BuildVectorSDNode>(N))
1607+
return false;
1608+
const auto *BVN = cast<BuildVectorSDNode>(N);
1609+
1610+
// Find first non-undef insertion.
1611+
unsigned Idx;
1612+
for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) {
1613+
auto ElemV = BVN->getOperand(Idx);
1614+
if (!ElemV->isUndef())
1615+
break;
1616+
}
1617+
// Catch the (hypothetical) all-undef case.
1618+
if (Idx == BVN->getNumOperands())
1619+
return false;
1620+
// Remember insertion.
1621+
UniqueIdx = Idx++;
1622+
// Verify that all other insertions are undef.
1623+
for (; Idx < BVN->getNumOperands(); ++Idx) {
1624+
auto ElemV = BVN->getOperand(Idx);
1625+
if (!ElemV->isUndef())
1626+
return false;
1627+
}
1628+
return true;
1629+
}
1630+
16051631
static SDValue getSplatValue(SDNode *N) {
16061632
if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(N)) {
16071633
return BuildVec->getSplatValue();
@@ -1615,6 +1641,17 @@ SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
16151641
unsigned NumEls = Op.getValueType().getVectorNumElements();
16161642
MVT ElemVT = Op.getSimpleValueType().getVectorElementType();
16171643

1644+
// If there is just one element, expand to INSERT_VECTOR_ELT.
1645+
unsigned UniqueIdx;
1646+
if (getUniqueInsertion(Op.getNode(), UniqueIdx)) {
1647+
SDValue AccuV = DAG.getUNDEF(Op.getValueType());
1648+
auto ElemV = Op->getOperand(UniqueIdx);
1649+
SDValue IdxV = DAG.getConstant(UniqueIdx, DL, MVT::i64);
1650+
return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), AccuV,
1651+
ElemV, IdxV);
1652+
}
1653+
1654+
// Else emit a broadcast.
16181655
if (SDValue ScalarV = getSplatValue(Op.getNode())) {
16191656
// lower to VEC_BROADCAST
16201657
MVT LegalResVT = MVT::getVectorVT(ElemVT, 256);
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
2+
3+
; Function Attrs: norecurse nounwind readnone
4+
; Check that a single-element insertion is lowered to a insert_vector_elt node for isel.
5+
define fastcc <256 x i32> @expand_single_elem_build_vec(i32 %x, i32 %y) {
6+
; CHECK-LABEL: expand_single_elem_build_vec:
7+
; CHECK: # %bb.0:
8+
; CHECK-NEXT: and %s0, %s0, (32)0
9+
; CHECK-NEXT: lsv %v0(42), %s0
10+
; CHECK-NEXT: b.l.t (, %s10)
11+
%r = insertelement <256 x i32> undef, i32 %x, i32 42
12+
ret <256 x i32> %r
13+
}

llvm/test/CodeGen/VE/Vector/insert_elt.ll

Lines changed: 29 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@ define fastcc <256 x i64> @insert_rr_v256i64(i32 signext %idx, i64 %s) {
1515
define fastcc <256 x i64> @insert_ri7_v256i64(i64 %s) {
1616
; CHECK-LABEL: insert_ri7_v256i64:
1717
; CHECK: # %bb.0:
18-
; CHECK-NEXT: lea %s0, 256
19-
; CHECK-NEXT: lvl %s0
20-
; CHECK-NEXT: vbrd %v0, %s0
18+
; CHECK-NEXT: lsv %v0(127), %s0
2119
; CHECK-NEXT: b.l.t (, %s10)
2220
%ret = insertelement <256 x i64> undef, i64 %s, i32 127
2321
ret <256 x i64> %ret
@@ -26,9 +24,8 @@ define fastcc <256 x i64> @insert_ri7_v256i64(i64 %s) {
2624
define fastcc <256 x i64> @insert_ri8_v256i64(i64 %s) {
2725
; CHECK-LABEL: insert_ri8_v256i64:
2826
; CHECK: # %bb.0:
29-
; CHECK-NEXT: lea %s0, 256
30-
; CHECK-NEXT: lvl %s0
31-
; CHECK-NEXT: vbrd %v0, %s0
27+
; CHECK-NEXT: lea %s1, 128
28+
; CHECK-NEXT: lsv %v0(%s1), %s0
3229
; CHECK-NEXT: b.l.t (, %s10)
3330
%ret = insertelement <256 x i64> undef, i64 %s, i32 128
3431
ret <256 x i64> %ret
@@ -37,9 +34,7 @@ define fastcc <256 x i64> @insert_ri8_v256i64(i64 %s) {
3734
define fastcc <512 x i64> @insert_ri_v512i64(i64 %s) {
3835
; CHECK-LABEL: insert_ri_v512i64:
3936
; CHECK: # %bb.0:
40-
; CHECK-NEXT: lea %s0, 256
41-
; CHECK-NEXT: lvl %s0
42-
; CHECK-NEXT: vbrd %v1, %s0
37+
; CHECK-NEXT: lsv %v1(116), %s0
4338
; CHECK-NEXT: b.l.t (, %s10)
4439
%ret = insertelement <512 x i64> undef, i64 %s, i32 372
4540
ret <512 x i64> %ret
@@ -60,9 +55,8 @@ define fastcc <256 x i32> @insert_rr_v256i32(i32 signext %idx, i32 signext %s) {
6055
define fastcc <256 x i32> @insert_ri7_v256i32(i32 signext %s) {
6156
; CHECK-LABEL: insert_ri7_v256i32:
6257
; CHECK: # %bb.0:
63-
; CHECK-NEXT: lea %s0, 256
64-
; CHECK-NEXT: lvl %s0
65-
; CHECK-NEXT: vbrd %v0, %s0
58+
; CHECK-NEXT: and %s0, %s0, (32)0
59+
; CHECK-NEXT: lsv %v0(127), %s0
6660
; CHECK-NEXT: b.l.t (, %s10)
6761
%ret = insertelement <256 x i32> undef, i32 %s, i32 127
6862
ret <256 x i32> %ret
@@ -71,9 +65,9 @@ define fastcc <256 x i32> @insert_ri7_v256i32(i32 signext %s) {
7165
define fastcc <256 x i32> @insert_ri8_v256i32(i32 signext %s) {
7266
; CHECK-LABEL: insert_ri8_v256i32:
7367
; CHECK: # %bb.0:
74-
; CHECK-NEXT: lea %s0, 256
75-
; CHECK-NEXT: lvl %s0
76-
; CHECK-NEXT: vbrd %v0, %s0
68+
; CHECK-NEXT: and %s0, %s0, (32)0
69+
; CHECK-NEXT: lea %s1, 128
70+
; CHECK-NEXT: lsv %v0(%s1), %s0
7771
; CHECK-NEXT: b.l.t (, %s10)
7872
%ret = insertelement <256 x i32> undef, i32 %s, i32 128
7973
ret <256 x i32> %ret
@@ -82,9 +76,12 @@ define fastcc <256 x i32> @insert_ri8_v256i32(i32 signext %s) {
8276
define fastcc <512 x i32> @insert_ri_v512i32(i32 signext %s) {
8377
; CHECK-LABEL: insert_ri_v512i32:
8478
; CHECK: # %bb.0:
85-
; CHECK-NEXT: lea %s0, 512
86-
; CHECK-NEXT: lvl %s0
87-
; CHECK-NEXT: vbrd %v0, %s0
79+
; CHECK-NEXT: lea %s1, 186
80+
; CHECK-NEXT: lvs %s2, %v0(%s1)
81+
; CHECK-NEXT: and %s2, %s2, (32)0
82+
; CHECK-NEXT: sll %s0, %s0, 32
83+
; CHECK-NEXT: or %s0, %s2, %s0
84+
; CHECK-NEXT: lsv %v0(%s1), %s0
8885
; CHECK-NEXT: b.l.t (, %s10)
8986
%ret = insertelement <512 x i32> undef, i32 %s, i32 372
9087
ret <512 x i32> %ret
@@ -122,9 +119,7 @@ define fastcc <256 x double> @insert_rr_v256f64(i32 signext %idx, double %s) {
122119
define fastcc <256 x double> @insert_ri7_v256f64(double %s) {
123120
; CHECK-LABEL: insert_ri7_v256f64:
124121
; CHECK: # %bb.0:
125-
; CHECK-NEXT: lea %s0, 256
126-
; CHECK-NEXT: lvl %s0
127-
; CHECK-NEXT: vbrd %v0, %s0
122+
; CHECK-NEXT: lsv %v0(127), %s0
128123
; CHECK-NEXT: b.l.t (, %s10)
129124
%ret = insertelement <256 x double> undef, double %s, i32 127
130125
ret <256 x double> %ret
@@ -133,9 +128,8 @@ define fastcc <256 x double> @insert_ri7_v256f64(double %s) {
133128
define fastcc <256 x double> @insert_ri8_v256f64(double %s) {
134129
; CHECK-LABEL: insert_ri8_v256f64:
135130
; CHECK: # %bb.0:
136-
; CHECK-NEXT: lea %s0, 256
137-
; CHECK-NEXT: lvl %s0
138-
; CHECK-NEXT: vbrd %v0, %s0
131+
; CHECK-NEXT: lea %s1, 128
132+
; CHECK-NEXT: lsv %v0(%s1), %s0
139133
; CHECK-NEXT: b.l.t (, %s10)
140134
%ret = insertelement <256 x double> undef, double %s, i32 128
141135
ret <256 x double> %ret
@@ -144,9 +138,7 @@ define fastcc <256 x double> @insert_ri8_v256f64(double %s) {
144138
define fastcc <512 x double> @insert_ri_v512f64(double %s) {
145139
; CHECK-LABEL: insert_ri_v512f64:
146140
; CHECK: # %bb.0:
147-
; CHECK-NEXT: lea %s0, 256
148-
; CHECK-NEXT: lvl %s0
149-
; CHECK-NEXT: vbrd %v1, %s0
141+
; CHECK-NEXT: lsv %v1(116), %s0
150142
; CHECK-NEXT: b.l.t (, %s10)
151143
%ret = insertelement <512 x double> undef, double %s, i32 372
152144
ret <512 x double> %ret
@@ -166,9 +158,7 @@ define fastcc <256 x float> @insert_rr_v256f32(i32 signext %idx, float %s) {
166158
define fastcc <256 x float> @insert_ri7_v256f32(float %s) {
167159
; CHECK-LABEL: insert_ri7_v256f32:
168160
; CHECK: # %bb.0:
169-
; CHECK-NEXT: lea %s0, 256
170-
; CHECK-NEXT: lvl %s0
171-
; CHECK-NEXT: vbrd %v0, %s0
161+
; CHECK-NEXT: lsv %v0(127), %s0
172162
; CHECK-NEXT: b.l.t (, %s10)
173163
%ret = insertelement <256 x float> undef, float %s, i32 127
174164
ret <256 x float> %ret
@@ -177,9 +167,8 @@ define fastcc <256 x float> @insert_ri7_v256f32(float %s) {
177167
define fastcc <256 x float> @insert_ri8_v256f32(float %s) {
178168
; CHECK-LABEL: insert_ri8_v256f32:
179169
; CHECK: # %bb.0:
180-
; CHECK-NEXT: lea %s0, 256
181-
; CHECK-NEXT: lvl %s0
182-
; CHECK-NEXT: vbrd %v0, %s0
170+
; CHECK-NEXT: lea %s1, 128
171+
; CHECK-NEXT: lsv %v0(%s1), %s0
183172
; CHECK-NEXT: b.l.t (, %s10)
184173
%ret = insertelement <256 x float> undef, float %s, i32 128
185174
ret <256 x float> %ret
@@ -188,9 +177,13 @@ define fastcc <256 x float> @insert_ri8_v256f32(float %s) {
188177
define fastcc <512 x float> @insert_ri_v512f32(float %s) {
189178
; CHECK-LABEL: insert_ri_v512f32:
190179
; CHECK: # %bb.0:
191-
; CHECK-NEXT: lea %s0, 512
192-
; CHECK-NEXT: lvl %s0
193-
; CHECK-NEXT: vbrd %v0, %s0
180+
; CHECK-NEXT: sra.l %s0, %s0, 32
181+
; CHECK-NEXT: lea %s1, 186
182+
; CHECK-NEXT: lvs %s2, %v0(%s1)
183+
; CHECK-NEXT: and %s2, %s2, (32)0
184+
; CHECK-NEXT: sll %s0, %s0, 32
185+
; CHECK-NEXT: or %s0, %s2, %s0
186+
; CHECK-NEXT: lsv %v0(%s1), %s0
194187
; CHECK-NEXT: b.l.t (, %s10)
195188
%ret = insertelement <512 x float> undef, float %s, i32 372
196189
ret <512 x float> %ret

0 commit comments

Comments
 (0)