Skip to content

Commit e893242

Browse files
authored
[RISCV] Don't combine store of vmv.x.s/vfmv.f.s to vp_store with VL of 1 when it's indexed store (llvm#73219)
Because we can't support vp_store with indexed address mode by lowering to vse intrinsic later.
1 parent 6318dd8 commit e893242

File tree

2 files changed

+79
-1
lines changed

2 files changed

+79
-1
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15262,7 +15262,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1526215262
SDValue Src = Val.getOperand(0);
1526315263
MVT VecVT = Src.getSimpleValueType();
1526415264
// VecVT should be scalable and memory VT should match the element type.
15265-
if (VecVT.isScalableVector() &&
15265+
if (!Store->isIndexed() && VecVT.isScalableVector() &&
1526615266
MemVT == VecVT.getVectorElementType()) {
1526715267
SDLoc DL(N);
1526815268
MVT MaskVT = getMaskTypeFor(VecVT);
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc -mtriple=riscv32 -mattr=+v,+xtheadmemidx,+xtheadmempair -verify-machineinstrs < %s \
3+
; RUN: | FileCheck %s --check-prefix RV32
4+
; RUN: llc -mtriple=riscv64 -mattr=+v,+xtheadmemidx,+xtheadmempair -verify-machineinstrs < %s \
5+
; RUN: | FileCheck %s --check-prefix RV64
6+
7+
define void @test(ptr %ref_array, ptr %sad_array) {
8+
; RV32-LABEL: test:
9+
; RV32: # %bb.0: # %entry
10+
; RV32-NEXT: th.lwd a2, a3, (a0), 0, 3
11+
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
12+
; RV32-NEXT: vle8.v v8, (a2)
13+
; RV32-NEXT: vmv.v.i v9, 0
14+
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
15+
; RV32-NEXT: vzext.vf4 v12, v8
16+
; RV32-NEXT: vmv.s.x v8, zero
17+
; RV32-NEXT: vredsum.vs v10, v12, v8
18+
; RV32-NEXT: vmv.x.s a0, v10
19+
; RV32-NEXT: th.swia a0, (a1), 4, 0
20+
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
21+
; RV32-NEXT: vle8.v v10, (a3)
22+
; RV32-NEXT: vsetivli zero, 8, e8, m1, tu, ma
23+
; RV32-NEXT: vslideup.vi v10, v9, 4
24+
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
25+
; RV32-NEXT: vzext.vf4 v12, v10
26+
; RV32-NEXT: vredsum.vs v8, v12, v8
27+
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
28+
; RV32-NEXT: vse32.v v8, (a1)
29+
; RV32-NEXT: ret
30+
;
31+
; RV64-LABEL: test:
32+
; RV64: # %bb.0: # %entry
33+
; RV64-NEXT: th.ldd a2, a3, (a0), 0, 4
34+
; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
35+
; RV64-NEXT: vle8.v v8, (a2)
36+
; RV64-NEXT: vmv.v.i v9, 0
37+
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
38+
; RV64-NEXT: vzext.vf4 v12, v8
39+
; RV64-NEXT: vmv.s.x v8, zero
40+
; RV64-NEXT: vredsum.vs v10, v12, v8
41+
; RV64-NEXT: vmv.x.s a0, v10
42+
; RV64-NEXT: th.swia a0, (a1), 4, 0
43+
; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
44+
; RV64-NEXT: vle8.v v10, (a3)
45+
; RV64-NEXT: vsetivli zero, 8, e8, m1, tu, ma
46+
; RV64-NEXT: vslideup.vi v10, v9, 4
47+
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
48+
; RV64-NEXT: vzext.vf4 v12, v10
49+
; RV64-NEXT: vredsum.vs v8, v12, v8
50+
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
51+
; RV64-NEXT: vse32.v v8, (a1)
52+
; RV64-NEXT: ret
53+
entry:
54+
%0 = load ptr, ptr %ref_array, align 8
55+
%1 = load <4 x i8>, ptr %0, align 1
56+
%2 = shufflevector <4 x i8> %1, <4 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
57+
%3 = zext <16 x i8> %2 to <16 x i32>
58+
%4 = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %3)
59+
store i32 %4, ptr %sad_array, align 4, !tbaa !0
60+
%arrayidx.1 = getelementptr ptr, ptr %ref_array, i64 1
61+
%5 = load ptr, ptr %arrayidx.1, align 8, !tbaa !4
62+
%6 = load <4 x i8>, ptr %5, align 1
63+
%7 = shufflevector <4 x i8> %6, <4 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
64+
%8 = zext <16 x i8> %7 to <16 x i32>
65+
%9 = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %8)
66+
%arrayidx2.1 = getelementptr i32, ptr %sad_array, i64 1
67+
store i32 %9, ptr %arrayidx2.1, align 4
68+
ret void
69+
}
70+
71+
declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
72+
73+
!0 = !{!1, !1, i64 0}
74+
!1 = !{!"int", !2, i64 0}
75+
!2 = !{!"omnipotent char", !3, i64 0}
76+
!3 = !{!"Simple C/C++ TBAA"}
77+
!4 = !{!5, !5, i64 0}
78+
!5 = !{!"any pointer", !2, i64 0}

0 commit comments

Comments
 (0)