You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[RISCV][VLOPT] Allow propogation even when VL isn't VLMAX
The original goal of this pass was to focus on vector operations with VLMAX.
However, users often utilize only part of the result, and such usage may come
from the vectorizer.
We found that relaxing this constraint can capture more optimization
opportunities, such as non-power-of-2 code generation and vector operation
sequences with different VLs.t show
---------
Co-authored-by: Kito Cheng <[email protected]>
@@ -23,7 +23,7 @@ define <vscale x 4 x i32> @different_imm_vl_with_ta(<vscale x 4 x i32> %passthru
23
23
ret <vscale x 4 x i32> %w
24
24
}
25
25
26
-
; No benificial to propagate VL since VL is larger in the use side.
26
+
; Not beneficial to propagate VL since VL is larger in the use side.
27
27
define <vscale x 4 x i32> @different_imm_vl_with_ta_larger_vl(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
@@ -38,20 +38,26 @@ define <vscale x 4 x i32> @different_imm_vl_with_ta_larger_vl(<vscale x 4 x i32>
38
38
}
39
39
40
40
define <vscale x 4 x i32> @different_imm_reg_vl_with_ta(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
41
-
; CHECK-LABEL: different_imm_reg_vl_with_ta:
42
-
; CHECK: # %bb.0:
43
-
; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
44
-
; CHECK-NEXT: vadd.vv v8, v10, v12
45
-
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
46
-
; CHECK-NEXT: vadd.vv v8, v8, v10
47
-
; CHECK-NEXT: ret
41
+
; NOVLOPT-LABEL: different_imm_reg_vl_with_ta:
42
+
; NOVLOPT: # %bb.0:
43
+
; NOVLOPT-NEXT: vsetivli zero, 4, e32, m2, ta, ma
44
+
; NOVLOPT-NEXT: vadd.vv v8, v10, v12
45
+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
46
+
; NOVLOPT-NEXT: vadd.vv v8, v8, v10
47
+
; NOVLOPT-NEXT: ret
48
+
;
49
+
; VLOPT-LABEL: different_imm_reg_vl_with_ta:
50
+
; VLOPT: # %bb.0:
51
+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
52
+
; VLOPT-NEXT: vadd.vv v8, v10, v12
53
+
; VLOPT-NEXT: vadd.vv v8, v8, v10
54
+
; VLOPT-NEXT: ret
48
55
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 4)
49
56
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen %vl1)
50
57
ret <vscale x 4 x i32> %w
51
58
}
52
59
53
-
54
-
; No benificial to propagate VL since VL is already one.
60
+
; Not beneficial to propagate VL since VL is already one.
55
61
define <vscale x 4 x i32> @different_imm_vl_with_ta_1(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
56
62
; CHECK-LABEL: different_imm_vl_with_ta_1:
57
63
; CHECK: # %bb.0:
@@ -69,13 +75,20 @@ define <vscale x 4 x i32> @different_imm_vl_with_ta_1(<vscale x 4 x i32> %passth
69
75
; it's still safe even %vl2 is larger than %vl1, becuase rest of the vector are
70
76
; undefined value.
71
77
define <vscale x 4 x i32> @different_vl_with_ta(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
72
-
; CHECK-LABEL: different_vl_with_ta:
73
-
; CHECK: # %bb.0:
74
-
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
75
-
; CHECK-NEXT: vadd.vv v10, v8, v10
76
-
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
77
-
; CHECK-NEXT: vadd.vv v8, v10, v8
78
-
; CHECK-NEXT: ret
78
+
; NOVLOPT-LABEL: different_vl_with_ta:
79
+
; NOVLOPT: # %bb.0:
80
+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
81
+
; NOVLOPT-NEXT: vadd.vv v10, v8, v10
82
+
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
83
+
; NOVLOPT-NEXT: vadd.vv v8, v10, v8
84
+
; NOVLOPT-NEXT: ret
85
+
;
86
+
; VLOPT-LABEL: different_vl_with_ta:
87
+
; VLOPT: # %bb.0:
88
+
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
89
+
; VLOPT-NEXT: vadd.vv v10, v8, v10
90
+
; VLOPT-NEXT: vadd.vv v8, v10, v8
91
+
; VLOPT-NEXT: ret
79
92
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1)
80
93
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a,iXLen %vl2)
81
94
ret <vscale x 4 x i32> %w
@@ -110,7 +123,3 @@ define <vscale x 4 x i32> @different_imm_vl_with_tu(<vscale x 4 x i32> %passthru
110
123
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a,iXLen 4)
111
124
ret <vscale x 4 x i32> %w
112
125
}
113
-
114
-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
0 commit comments