Skip to content

Commit 0457f50

Browse files
committed
[RISCV] Implement storeOfVectorConstantIsCheap hook to prevent store merging at VL=2
In general, VL=2 vectors are very questionable profitability wise. For constants specifically, our inability to materialize many vector constants cheaply biases us strongly towards unprofitability at VL=2. This hook is very close to the x86 implementation. The difference is that X86 whitelists stores of zeros, and we're better off letting that stay scalar at VL=2. Differential Revision: https://reviews.llvm.org/D150798
1 parent 6c59f39 commit 0457f50

File tree

2 files changed

+26
-34
lines changed

2 files changed

+26
-34
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,13 @@ class RISCVTargetLowering : public TargetLowering {
531531
return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed);
532532
}
533533

534+
bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
535+
unsigned AddrSpace) const override {
536+
// If we can replace 4 or more scalar stores, there will be a reduction
537+
// in instructions even after we add a vector constant load.
538+
return NumElem >= 4;
539+
}
540+
534541
bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
535542
return VT.isScalarInteger();
536543
}

llvm/test/CodeGen/RISCV/rvv/combine-store.ll

Lines changed: 19 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,8 @@ define void @combine_zero_stores_4xi8(ptr %p) {
3131
define void @combine_zero_stores_8xi8(ptr %p) {
3232
; RV32-LABEL: combine_zero_stores_8xi8:
3333
; RV32: # %bb.0:
34-
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
35-
; RV32-NEXT: vmv.v.i v8, 0
36-
; RV32-NEXT: vse32.v v8, (a0)
34+
; RV32-NEXT: sw zero, 0(a0)
35+
; RV32-NEXT: sw zero, 4(a0)
3736
; RV32-NEXT: ret
3837
;
3938
; RV64-LABEL: combine_zero_stores_8xi8:
@@ -72,9 +71,8 @@ define void @combine_zero_stores_2xi16(ptr %p) {
7271
define void @combine_zero_stores_4xi16(ptr %p) {
7372
; RV32-LABEL: combine_zero_stores_4xi16:
7473
; RV32: # %bb.0:
75-
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
76-
; RV32-NEXT: vmv.v.i v8, 0
77-
; RV32-NEXT: vse32.v v8, (a0)
74+
; RV32-NEXT: sw zero, 0(a0)
75+
; RV32-NEXT: sw zero, 4(a0)
7876
; RV32-NEXT: ret
7977
;
8078
; RV64-LABEL: combine_zero_stores_4xi16:
@@ -104,9 +102,8 @@ define void @combine_zero_stores_8xi16(ptr %p) {
104102
;
105103
; RV64-LABEL: combine_zero_stores_8xi16:
106104
; RV64: # %bb.0:
107-
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
108-
; RV64-NEXT: vmv.v.i v8, 0
109-
; RV64-NEXT: vse64.v v8, (a0)
105+
; RV64-NEXT: sd zero, 0(a0)
106+
; RV64-NEXT: sd zero, 8(a0)
110107
; RV64-NEXT: ret
111108
store i16 zeroinitializer, ptr %p, align 16
112109
%gep1 = getelementptr i16, ptr %p, i64 1
@@ -129,9 +126,8 @@ define void @combine_zero_stores_8xi16(ptr %p) {
129126
define void @combine_zero_stores_2xi32(ptr %p) {
130127
; RV32-LABEL: combine_zero_stores_2xi32:
131128
; RV32: # %bb.0:
132-
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
133-
; RV32-NEXT: vmv.v.i v8, 0
134-
; RV32-NEXT: vse32.v v8, (a0)
129+
; RV32-NEXT: sw zero, 0(a0)
130+
; RV32-NEXT: sw zero, 4(a0)
135131
; RV32-NEXT: ret
136132
;
137133
; RV64-LABEL: combine_zero_stores_2xi32:
@@ -154,9 +150,8 @@ define void @combine_zero_stores_4xi32(ptr %p) {
154150
;
155151
; RV64-LABEL: combine_zero_stores_4xi32:
156152
; RV64: # %bb.0:
157-
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
158-
; RV64-NEXT: vmv.v.i v8, 0
159-
; RV64-NEXT: vse64.v v8, (a0)
153+
; RV64-NEXT: sd zero, 0(a0)
154+
; RV64-NEXT: sd zero, 8(a0)
160155
; RV64-NEXT: ret
161156
store i32 zeroinitializer, ptr %p, align 16
162157
%gep1 = getelementptr i32, ptr %p, i64 1
@@ -201,18 +196,11 @@ define void @combine_zero_stores_8xi32(ptr %p) {
201196
}
202197

203198
define void @combine_zero_stores_2xi32_unaligned(ptr %p) {
204-
; RV32-LABEL: combine_zero_stores_2xi32_unaligned:
205-
; RV32: # %bb.0:
206-
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
207-
; RV32-NEXT: vmv.v.i v8, 0
208-
; RV32-NEXT: vse32.v v8, (a0)
209-
; RV32-NEXT: ret
210-
;
211-
; RV64-LABEL: combine_zero_stores_2xi32_unaligned:
212-
; RV64: # %bb.0:
213-
; RV64-NEXT: sw zero, 0(a0)
214-
; RV64-NEXT: sw zero, 4(a0)
215-
; RV64-NEXT: ret
199+
; CHECK-LABEL: combine_zero_stores_2xi32_unaligned:
200+
; CHECK: # %bb.0:
201+
; CHECK-NEXT: sw zero, 0(a0)
202+
; CHECK-NEXT: sw zero, 4(a0)
203+
; CHECK-NEXT: ret
216204
store i32 zeroinitializer, ptr %p
217205
%gep = getelementptr i8, ptr %p, i64 4
218206
store i32 zeroinitializer, ptr %gep
@@ -230,9 +218,8 @@ define void @combine_zero_stores_2xi64(ptr %p) {
230218
;
231219
; RV64-LABEL: combine_zero_stores_2xi64:
232220
; RV64: # %bb.0:
233-
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
234-
; RV64-NEXT: vmv.v.i v8, 0
235-
; RV64-NEXT: vse64.v v8, (a0)
221+
; RV64-NEXT: sd zero, 0(a0)
222+
; RV64-NEXT: sd zero, 8(a0)
236223
; RV64-NEXT: ret
237224
store i64 zeroinitializer, ptr %p
238225
%gep = getelementptr i8, ptr %p, i64 8
@@ -243,10 +230,8 @@ define void @combine_zero_stores_2xi64(ptr %p) {
243230
define void @combine_fp_zero_stores_crash(ptr %ptr) {
244231
; CHECK-LABEL: combine_fp_zero_stores_crash:
245232
; CHECK: # %bb.0:
246-
; CHECK-NEXT: addi a0, a0, 4
247-
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
248-
; CHECK-NEXT: vmv.v.i v8, 0
249-
; CHECK-NEXT: vse32.v v8, (a0)
233+
; CHECK-NEXT: sw zero, 4(a0)
234+
; CHECK-NEXT: sw zero, 8(a0)
250235
; CHECK-NEXT: ret
251236
%addr1 = getelementptr float, ptr %ptr, i64 1
252237
%addr2 = getelementptr float, ptr %ptr, i64 2

0 commit comments

Comments
 (0)