Skip to content

Commit 67b71ad

Browse files
committed
[DAG] Fold insert_subvector undef, (extract_subvector X, 0), 0 with non-matching types
We have an existing DAG combine for when an insert/extract subvector pair is entirely a nop, but we hadn't handled the case where the net result was either an insert or an extract (but not both). The transform is restricted to index = 0 to avoid having to adjust indices after the transform. Differential Revision: https://reviews.llvm.org/D158201
1 parent 630ba7d commit 67b71ad

30 files changed

+7021
-6674
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25597,10 +25597,25 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
2559725597
return N0;
2559825598

2559925599
// If this is an insert of an extracted vector into an undef vector, we can
25600-
// just use the input to the extract.
25600+
// just use the input to the extract if the types match, and can simplify
25601+
// in some cases even if they don't.
2560125602
if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
25602-
N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
25603-
return N1.getOperand(0);
25603+
N1.getOperand(1) == N2) {
25604+
EVT SrcVT = N1.getOperand(0).getValueType();
25605+
if (SrcVT == VT)
25606+
return N1.getOperand(0);
25607+
// TODO: To remove the zero check, need to adjust the offset to
25608+
// a multiple of the new src type.
25609+
if (isNullConstant(N2) &&
25610+
VT.isScalableVector() == SrcVT.isScalableVector()) {
25611+
if (VT.getVectorMinNumElements() >= SrcVT.getVectorMinNumElements())
25612+
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
25613+
VT, N0, N1.getOperand(0), N2);
25614+
else
25615+
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
25616+
VT, N1.getOperand(0), N2);
25617+
}
25618+
}
2560425619

2560525620
// Simplify scalar inserts into an undef vector:
2560625621
// insert_subvector undef, (splat X), N2 -> splat X

llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -103,15 +103,15 @@ define <8 x i1> @fv8(ptr %p, i64 %index, i64 %tc) {
103103
define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) {
104104
; CHECK-LABEL: fv32:
105105
; CHECK: # %bb.0:
106+
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
106107
; CHECK-NEXT: lui a0, %hi(.LCPI8_0)
107108
; CHECK-NEXT: addi a0, a0, %lo(.LCPI8_0)
108-
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
109109
; CHECK-NEXT: vle64.v v8, (a0)
110+
; CHECK-NEXT: vid.v v16
111+
; CHECK-NEXT: vsaddu.vx v16, v16, a1
112+
; CHECK-NEXT: vmsltu.vx v0, v16, a2
110113
; CHECK-NEXT: vsaddu.vx v8, v8, a1
111114
; CHECK-NEXT: vmsltu.vx v16, v8, a2
112-
; CHECK-NEXT: vid.v v8
113-
; CHECK-NEXT: vsaddu.vx v8, v8, a1
114-
; CHECK-NEXT: vmsltu.vx v0, v8, a2
115115
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
116116
; CHECK-NEXT: vslideup.vi v0, v16, 2
117117
; CHECK-NEXT: ret
@@ -122,15 +122,15 @@ define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) {
122122
define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) {
123123
; CHECK-LABEL: fv64:
124124
; CHECK: # %bb.0:
125+
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
125126
; CHECK-NEXT: lui a0, %hi(.LCPI9_0)
126127
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0)
127-
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
128128
; CHECK-NEXT: vle64.v v8, (a0)
129+
; CHECK-NEXT: vid.v v16
130+
; CHECK-NEXT: vsaddu.vx v16, v16, a1
131+
; CHECK-NEXT: vmsltu.vx v0, v16, a2
129132
; CHECK-NEXT: vsaddu.vx v8, v8, a1
130133
; CHECK-NEXT: vmsltu.vx v16, v8, a2
131-
; CHECK-NEXT: vid.v v8
132-
; CHECK-NEXT: vsaddu.vx v8, v8, a1
133-
; CHECK-NEXT: vmsltu.vx v0, v8, a2
134134
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
135135
; CHECK-NEXT: vslideup.vi v0, v16, 2
136136
; CHECK-NEXT: lui a0, %hi(.LCPI9_1)
@@ -157,15 +157,15 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) {
157157
define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
158158
; CHECK-LABEL: fv128:
159159
; CHECK: # %bb.0:
160+
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
160161
; CHECK-NEXT: lui a0, %hi(.LCPI10_0)
161162
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_0)
162-
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
163163
; CHECK-NEXT: vle64.v v8, (a0)
164+
; CHECK-NEXT: vid.v v16
165+
; CHECK-NEXT: vsaddu.vx v16, v16, a1
166+
; CHECK-NEXT: vmsltu.vx v0, v16, a2
164167
; CHECK-NEXT: vsaddu.vx v8, v8, a1
165168
; CHECK-NEXT: vmsltu.vx v16, v8, a2
166-
; CHECK-NEXT: vid.v v8
167-
; CHECK-NEXT: vsaddu.vx v8, v8, a1
168-
; CHECK-NEXT: vmsltu.vx v0, v8, a2
169169
; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma
170170
; CHECK-NEXT: vslideup.vi v0, v16, 2
171171
; CHECK-NEXT: lui a0, %hi(.LCPI10_1)

llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -469,13 +469,14 @@ define <vscale x 6 x half> @extract_nxv6f16_nxv12f16_6(<vscale x 12 x half> %in)
469469
; CHECK: # %bb.0:
470470
; CHECK-NEXT: csrr a0, vlenb
471471
; CHECK-NEXT: srli a0, a0, 2
472+
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
473+
; CHECK-NEXT: vslidedown.vx v13, v10, a0
472474
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
473-
; CHECK-NEXT: vslidedown.vx v8, v9, a0
475+
; CHECK-NEXT: vslidedown.vx v12, v9, a0
474476
; CHECK-NEXT: add a1, a0, a0
475477
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
476-
; CHECK-NEXT: vslideup.vx v8, v10, a0
477-
; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
478-
; CHECK-NEXT: vslidedown.vx v9, v10, a0
478+
; CHECK-NEXT: vslideup.vx v12, v10, a0
479+
; CHECK-NEXT: vmv2r.v v8, v12
479480
; CHECK-NEXT: ret
480481
%res = call <vscale x 6 x half> @llvm.vector.extract.nxv6f16.nxv12f16(<vscale x 12 x half> %in, i64 6)
481482
ret <vscale x 6 x half> %res

0 commit comments

Comments
 (0)