Skip to content

Commit f2aa501

Browse files
committed
[DAGCombiner] Combine vp.strided.store with unit stride to vp.store
This is the VP equivalent of llvm#66677. If we have a strided store where the stride is equal to the element width, we can just use a regular VP store.
1 parent 858b465 commit f2aa501

File tree

3 files changed

+34
-28
lines changed

3 files changed

+34
-28
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,7 @@ namespace {
540540
SDValue visitVPGATHER(SDNode *N);
541541
SDValue visitVPSCATTER(SDNode *N);
542542
SDValue visitVP_STRIDED_LOAD(SDNode *N);
543+
SDValue visitVP_STRIDED_STORE(SDNode *N);
543544
SDValue visitFP_TO_FP16(SDNode *N);
544545
SDValue visitFP16_TO_FP(SDNode *N);
545546
SDValue visitFP_TO_BF16(SDNode *N);
@@ -11873,6 +11874,21 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
1187311874
return SDValue();
1187411875
}
1187511876

11877+
SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
11878+
auto *SST = cast<VPStridedStoreSDNode>(N);
11879+
EVT EltVT = SST->getValue().getValueType().getVectorElementType();
11880+
// Combine strided loads with unit-stride to a regular load.
11881+
if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
11882+
CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
11883+
return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
11884+
SST->getBasePtr(), SST->getOffset(), SST->getMask(),
11885+
SST->getVectorLength(), SST->getMemoryVT(),
11886+
SST->getMemOperand(), SST->getAddressingMode(),
11887+
SST->isTruncatingStore(), SST->isCompressingStore());
11888+
}
11889+
return SDValue();
11890+
}
11891+
1187611892
SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
1187711893
VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
1187811894
SDValue Mask = MGT->getMask();
@@ -25997,6 +26013,10 @@ SDValue DAGCombiner::visitVPOp(SDNode *N) {
2599726013
if (SDValue SD = visitVP_STRIDED_LOAD(N))
2599826014
return SD;
2599926015

26016+
if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
26017+
if (SDValue SD = visitVP_STRIDED_STORE(N))
26018+
return SD;
26019+
2600026020
// VP operations in which all vector elements are disabled - either by
2600126021
// determining that the mask is all false or that the EVL is 0 - can be
2600226022
// eliminated.

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,8 @@ define void @strided_vpstore_v8i8(<8 x i8> %val, ptr %ptr, i32 signext %stride,
8787
define void @strided_vpstore_v8i8_unit_stride(<8 x i8> %val, ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
8888
; CHECK-LABEL: strided_vpstore_v8i8_unit_stride:
8989
; CHECK: # %bb.0:
90-
; CHECK-NEXT: li a2, 1
9190
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
92-
; CHECK-NEXT: vsse8.v v8, (a0), a2, v0.t
91+
; CHECK-NEXT: vse8.v v8, (a0), v0.t
9392
; CHECK-NEXT: ret
9493
call void @llvm.experimental.vp.strided.store.v8i8.p0.i32(<8 x i8> %val, ptr %ptr, i32 1, <8 x i1> %m, i32 %evl)
9594
ret void
@@ -134,9 +133,8 @@ define void @strided_vpstore_v8i16(<8 x i16> %val, ptr %ptr, i32 signext %stride
134133
define void @strided_vpstore_v8i16_unit_stride(<8 x i16> %val, ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
135134
; CHECK-LABEL: strided_vpstore_v8i16_unit_stride:
136135
; CHECK: # %bb.0:
137-
; CHECK-NEXT: li a2, 2
138136
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
139-
; CHECK-NEXT: vsse16.v v8, (a0), a2, v0.t
137+
; CHECK-NEXT: vse16.v v8, (a0), v0.t
140138
; CHECK-NEXT: ret
141139
call void @llvm.experimental.vp.strided.store.v8i16.p0.i32(<8 x i16> %val, ptr %ptr, i32 2, <8 x i1> %m, i32 %evl)
142140
ret void
@@ -169,9 +167,8 @@ define void @strided_vpstore_v4i32(<4 x i32> %val, ptr %ptr, i32 signext %stride
169167
define void @strided_vpstore_v4i32_unit_stride(<4 x i32> %val, ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
170168
; CHECK-LABEL: strided_vpstore_v4i32_unit_stride:
171169
; CHECK: # %bb.0:
172-
; CHECK-NEXT: li a2, 4
173170
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
174-
; CHECK-NEXT: vsse32.v v8, (a0), a2, v0.t
171+
; CHECK-NEXT: vse32.v v8, (a0), v0.t
175172
; CHECK-NEXT: ret
176173
call void @llvm.experimental.vp.strided.store.v4i32.p0.i32(<4 x i32> %val, ptr %ptr, i32 4, <4 x i1> %m, i32 %evl)
177174
ret void
@@ -204,9 +201,8 @@ define void @strided_vpstore_v2i64(<2 x i64> %val, ptr %ptr, i32 signext %stride
204201
define void @strided_vpstore_v2i64_unit_stride(<2 x i64> %val, ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
205202
; CHECK-LABEL: strided_vpstore_v2i64_unit_stride:
206203
; CHECK: # %bb.0:
207-
; CHECK-NEXT: li a2, 8
208204
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
209-
; CHECK-NEXT: vsse64.v v8, (a0), a2, v0.t
205+
; CHECK-NEXT: vse64.v v8, (a0), v0.t
210206
; CHECK-NEXT: ret
211207
call void @llvm.experimental.vp.strided.store.v2i64.p0.i32(<2 x i64> %val, ptr %ptr, i32 8, <2 x i1> %m, i32 %evl)
212208
ret void
@@ -275,9 +271,8 @@ define void @strided_vpstore_v8f16(<8 x half> %val, ptr %ptr, i32 signext %strid
275271
define void @strided_vpstore_v8f16_unit_stride(<8 x half> %val, ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
276272
; CHECK-LABEL: strided_vpstore_v8f16_unit_stride:
277273
; CHECK: # %bb.0:
278-
; CHECK-NEXT: li a2, 2
279274
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
280-
; CHECK-NEXT: vsse16.v v8, (a0), a2, v0.t
275+
; CHECK-NEXT: vse16.v v8, (a0), v0.t
281276
; CHECK-NEXT: ret
282277
call void @llvm.experimental.vp.strided.store.v8f16.p0.i32(<8 x half> %val, ptr %ptr, i32 2, <8 x i1> %m, i32 %evl)
283278
ret void
@@ -310,9 +305,8 @@ define void @strided_vpstore_v4f32(<4 x float> %val, ptr %ptr, i32 signext %stri
310305
define void @strided_vpstore_v4f32_unit_stride(<4 x float> %val, ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
311306
; CHECK-LABEL: strided_vpstore_v4f32_unit_stride:
312307
; CHECK: # %bb.0:
313-
; CHECK-NEXT: li a2, 4
314308
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
315-
; CHECK-NEXT: vsse32.v v8, (a0), a2, v0.t
309+
; CHECK-NEXT: vse32.v v8, (a0), v0.t
316310
; CHECK-NEXT: ret
317311
call void @llvm.experimental.vp.strided.store.v4f32.p0.i32(<4 x float> %val, ptr %ptr, i32 4, <4 x i1> %m, i32 %evl)
318312
ret void
@@ -345,9 +339,8 @@ define void @strided_vpstore_v2f64(<2 x double> %val, ptr %ptr, i32 signext %str
345339
define void @strided_vpstore_v2f64_unit_stride(<2 x double> %val, ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
346340
; CHECK-LABEL: strided_vpstore_v2f64_unit_stride:
347341
; CHECK: # %bb.0:
348-
; CHECK-NEXT: li a2, 8
349342
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
350-
; CHECK-NEXT: vsse64.v v8, (a0), a2, v0.t
343+
; CHECK-NEXT: vse64.v v8, (a0), v0.t
351344
; CHECK-NEXT: ret
352345
call void @llvm.experimental.vp.strided.store.v2f64.p0.i32(<2 x double> %val, ptr %ptr, i32 8, <2 x i1> %m, i32 %evl)
353346
ret void

llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,8 @@ define void @strided_vpstore_nxv8i8(<vscale x 8 x i8> %val, ptr %ptr, i32 signex
9999
define void @strided_vpstore_nxv8i8_unit_stride(<vscale x 8 x i8> %val, ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
100100
; CHECK-LABEL: strided_vpstore_nxv8i8_unit_stride:
101101
; CHECK: # %bb.0:
102-
; CHECK-NEXT: li a2, 1
103102
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
104-
; CHECK-NEXT: vsse8.v v8, (a0), a2, v0.t
103+
; CHECK-NEXT: vse8.v v8, (a0), v0.t
105104
; CHECK-NEXT: ret
106105
call void @llvm.experimental.vp.strided.store.nxv8i8.p0.i32(<vscale x 8 x i8> %val, ptr %ptr, i32 1, <vscale x 8 x i1> %m, i32 %evl)
107106
ret void
@@ -146,9 +145,8 @@ define void @strided_vpstore_nxv4i16(<vscale x 4 x i16> %val, ptr %ptr, i32 sign
146145
define void @strided_vpstore_nxv4i16_unit_stride(<vscale x 4 x i16> %val, ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
147146
; CHECK-LABEL: strided_vpstore_nxv4i16_unit_stride:
148147
; CHECK: # %bb.0:
149-
; CHECK-NEXT: li a2, 2
150148
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
151-
; CHECK-NEXT: vsse16.v v8, (a0), a2, v0.t
149+
; CHECK-NEXT: vse16.v v8, (a0), v0.t
152150
; CHECK-NEXT: ret
153151
call void @llvm.experimental.vp.strided.store.nxv4i16.p0.i32(<vscale x 4 x i16> %val, ptr %ptr, i32 2, <vscale x 4 x i1> %m, i32 %evl)
154152
ret void
@@ -205,9 +203,8 @@ define void @strided_vpstore_nxv4i32(<vscale x 4 x i32> %val, ptr %ptr, i32 sign
205203
define void @strided_vpstore_nxv4i32_unit_stride(<vscale x 4 x i32> %val, ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
206204
; CHECK-LABEL: strided_vpstore_nxv4i32_unit_stride:
207205
; CHECK: # %bb.0:
208-
; CHECK-NEXT: li a2, 4
209206
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
210-
; CHECK-NEXT: vsse32.v v8, (a0), a2, v0.t
207+
; CHECK-NEXT: vse32.v v8, (a0), v0.t
211208
; CHECK-NEXT: ret
212209
call void @llvm.experimental.vp.strided.store.nxv4i32.p0.i32(<vscale x 4 x i32> %val, ptr %ptr, i32 4, <vscale x 4 x i1> %m, i32 %evl)
213210
ret void
@@ -240,9 +237,8 @@ define void @strided_vpstore_nxv1i64(<vscale x 1 x i64> %val, ptr %ptr, i32 sign
240237
define void @strided_vpstore_nxv1i64_unit_stride(<vscale x 1 x i64> %val, ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
241238
; CHECK-LABEL: strided_vpstore_nxv1i64_unit_stride:
242239
; CHECK: # %bb.0:
243-
; CHECK-NEXT: li a2, 8
244240
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
245-
; CHECK-NEXT: vsse64.v v8, (a0), a2, v0.t
241+
; CHECK-NEXT: vse64.v v8, (a0), v0.t
246242
; CHECK-NEXT: ret
247243
call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i32(<vscale x 1 x i64> %val, ptr %ptr, i32 8, <vscale x 1 x i1> %m, i32 %evl)
248244
ret void
@@ -323,9 +319,8 @@ define void @strided_vpstore_nxv4f16(<vscale x 4 x half> %val, ptr %ptr, i32 sig
323319
define void @strided_vpstore_nxv4f16_unit_stride(<vscale x 4 x half> %val, ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
324320
; CHECK-LABEL: strided_vpstore_nxv4f16_unit_stride:
325321
; CHECK: # %bb.0:
326-
; CHECK-NEXT: li a2, 2
327322
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
328-
; CHECK-NEXT: vsse16.v v8, (a0), a2, v0.t
323+
; CHECK-NEXT: vse16.v v8, (a0), v0.t
329324
; CHECK-NEXT: ret
330325
call void @llvm.experimental.vp.strided.store.nxv4f16.p0.i32(<vscale x 4 x half> %val, ptr %ptr, i32 2, <vscale x 4 x i1> %m, i32 %evl)
331326
ret void
@@ -382,9 +377,8 @@ define void @strided_vpstore_nxv4f32(<vscale x 4 x float> %val, ptr %ptr, i32 si
382377
define void @strided_vpstore_nxv4f32_unit_stride(<vscale x 4 x float> %val, ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
383378
; CHECK-LABEL: strided_vpstore_nxv4f32_unit_stride:
384379
; CHECK: # %bb.0:
385-
; CHECK-NEXT: li a2, 4
386380
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
387-
; CHECK-NEXT: vsse32.v v8, (a0), a2, v0.t
381+
; CHECK-NEXT: vse32.v v8, (a0), v0.t
388382
; CHECK-NEXT: ret
389383
call void @llvm.experimental.vp.strided.store.nxv4f32.p0.i32(<vscale x 4 x float> %val, ptr %ptr, i32 4, <vscale x 4 x i1> %m, i32 %evl)
390384
ret void
@@ -417,9 +411,8 @@ define void @strided_vpstore_nxv1f64(<vscale x 1 x double> %val, ptr %ptr, i32 s
417411
define void @strided_vpstore_nxv1f64_unit_stride(<vscale x 1 x double> %val, ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
418412
; CHECK-LABEL: strided_vpstore_nxv1f64_unit_stride:
419413
; CHECK: # %bb.0:
420-
; CHECK-NEXT: li a2, 8
421414
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
422-
; CHECK-NEXT: vsse64.v v8, (a0), a2, v0.t
415+
; CHECK-NEXT: vse64.v v8, (a0), v0.t
423416
; CHECK-NEXT: ret
424417
call void @llvm.experimental.vp.strided.store.nxv1f64.p0.i32(<vscale x 1 x double> %val, ptr %ptr, i32 8, <vscale x 1 x i1> %m, i32 %evl)
425418
ret void

0 commit comments

Comments
 (0)