You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[RISCV] Don't fold vmerge.vvm or vmv.v.v into vredsum.vs if AVL changed (#99006)
When folding, we currently check if the pseudo's result is not lanewise
(e.g. vredsum.vs or viota.m) and bail if we're changing the mask.
However we also need to check for the AVL too.
This patch bails if the AVL changed for these pseudos, and also renames
the pseudo table property to be more explicit.
Copy file name to clipboardExpand all lines: llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll
+52-2Lines changed: 52 additions & 2 deletions
Original file line number
Diff line number
Diff line change
@@ -19,6 +19,17 @@ define <vscale x 4 x i32> @vadd(<vscale x 4 x i32> %passthru, <vscale x 4 x i32>
19
19
ret <vscale x 4 x i32> %w
20
20
}
21
21
22
+
define <vscale x 4 x i32> @vadd_mask(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, iXLen %vl) {
23
+
; CHECK-LABEL: vadd_mask:
24
+
; CHECK: # %bb.0:
25
+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu
26
+
; CHECK-NEXT: vadd.vv v8, v10, v12, v0.t
27
+
; CHECK-NEXT: ret
28
+
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.mask.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, iXLen %vl, iXLen 3)
29
+
%w = call <vscale x 4 x i32> @llvm.riscv.vmv.v.v.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, iXLen %vl)
30
+
ret <vscale x 4 x i32> %w
31
+
}
32
+
22
33
define <vscale x 4 x i32> @vadd_undef(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
23
34
; CHECK-LABEL: vadd_undef:
24
35
; CHECK: # %bb.0:
@@ -106,8 +117,8 @@ declare <vscale x 4 x float> @llvm.riscv.vmv.v.v.nxv4f32(<vscale x 4 x float>, <
106
117
107
118
declare <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, iXLen, iXLen)
108
119
109
-
define <vscale x 4 x float> @vfadd(<vscale x 4 x float> %passthru, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl1, iXLen %vl2) {
110
-
; CHECK-LABEL: vfadd:
120
+
define <vscale x 4 x float> @unfoldable_vfadd(<vscale x 4 x float> %passthru, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl1, iXLen %vl2) {
121
+
; CHECK-LABEL: unfoldable_vfadd:
111
122
; CHECK: # %bb.0:
112
123
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
113
124
; CHECK-NEXT: vfadd.vv v10, v10, v12
@@ -118,3 +129,42 @@ define <vscale x 4 x float> @vfadd(<vscale x 4 x float> %passthru, <vscale x 4 x
118
129
%w = call <vscale x 4 x float> @llvm.riscv.vmv.v.v.nxv4f32(<vscale x 4 x float> %passthru, <vscale x 4 x float> %v, iXLen %vl2)
119
130
ret <vscale x 4 x float> %w
120
131
}
132
+
133
+
define <vscale x 4 x float> @foldable_vfadd(<vscale x 4 x float> %passthru, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) {
134
+
; CHECK-LABEL: foldable_vfadd:
135
+
; CHECK: # %bb.0:
136
+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
137
+
; CHECK-NEXT: vfadd.vv v8, v10, v12
138
+
; CHECK-NEXT: ret
139
+
%v = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen 7, iXLen %vl)
140
+
%w = call <vscale x 4 x float> @llvm.riscv.vmv.v.v.nxv4f32(<vscale x 4 x float> %passthru, <vscale x 4 x float> %v, iXLen %vl)
141
+
ret <vscale x 4 x float> %w
142
+
}
143
+
144
+
; This shouldn't be folded because we need to preserve exceptions with
145
+
; "fpexcept.strict" exception behaviour, and changing the VL may hide them.
146
+
define <vscale x 4 x float> @unfoldable_constrained_fadd(<vscale x 4 x float> %passthru, <vscale x 4 x float> %x, <vscale x 4 x float> %y, iXLen %vl) strictfp {
147
+
; CHECK-LABEL: unfoldable_constrained_fadd:
148
+
; CHECK: # %bb.0:
149
+
; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
150
+
; CHECK-NEXT: vfadd.vv v10, v10, v12
151
+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
152
+
; CHECK-NEXT: vmv.v.v v8, v10
153
+
; CHECK-NEXT: ret
154
+
%a = call <vscale x 4 x float> @llvm.experimental.constrained.fadd(<vscale x 4 x float> %x, <vscale x 4 x float> %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
155
+
%b = call <vscale x 4 x float> @llvm.riscv.vmv.v.v.nxv4f32(<vscale x 4 x float> %passthru, <vscale x 4 x float> %a, iXLen %vl) strictfp
156
+
ret <vscale x 4 x float> %b
157
+
}
158
+
159
+
define <vscale x 2 x i32> @unfoldable_vredsum(<vscale x 2 x i32> %passthru, <vscale x 4 x i32> %x, <vscale x 2 x i32> %y) {
160
+
; CHECK-LABEL: unfoldable_vredsum:
161
+
; CHECK: # %bb.0:
162
+
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
163
+
; CHECK-NEXT: vredsum.vs v9, v10, v9
164
+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, tu, ma
165
+
; CHECK-NEXT: vmv.v.v v8, v9
166
+
; CHECK-NEXT: ret
167
+
%a = call <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv4i32(<vscale x 2 x i32> poison, <vscale x 4 x i32> %x, <vscale x 2 x i32> %y, iXLen -1)
168
+
%b = call <vscale x 2 x i32> @llvm.riscv.vmv.v.v.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, iXLen 1)
%a = call <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32(
1027
+
<vscale x 2 x i32> %passthru,
1028
+
<vscale x 2 x i32> %x,
1029
+
<vscale x 2 x i32> %y,
1030
+
i64 -1)
1031
+
%b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> splat (i1 -1), i641)
1032
+
ret <vscale x 2 x i32> %b
1033
+
}
1034
+
1017
1035
declare <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16.i64(<vscale x 32 x i16>, ptrnocapture, i64)
1018
1036
declare <vscale x 32 x i8> @llvm.riscv.vssubu.mask.nxv32i8.i8.i64(<vscale x 32 x i8>, <vscale x 32 x i8>, i8, <vscale x 32 x i1>, i64, i64 immarg)
1019
1037
declare <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8.nxv32i8.i64(<vscale x 32 x i8>, <vscale x 32 x i8>, i64)
0 commit comments