You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[InstCombine] Implement vp.reverse reordering/elimination through binop/unop (#143963)
This simply copies the structure of the vector.reverse patterns from
just above, and reimplements them for the vp.reverse intrinsics when the
mask is all ones and the EVLs exactly match.
Its unfortunate that we have three different ways to represent a reverse
(shuffle, vector.reverse, and vp.reverse) but I don't see an obvious way
to remove any them because the semantics are slightly different.
This significantly improves vectorization in TSVC_2's s112 and s1112
loops when using EVL tail folding.
Copy file name to clipboardExpand all lines: llvm/test/Transforms/InstCombine/vp-reverse.ll
+45-23Lines changed: 45 additions & 23 deletions
Original file line number
Diff line number
Diff line change
@@ -3,11 +3,8 @@
3
3
4
4
define <vscale x 4 x i32> @binop_reverse_elim(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32%evl) {
5
5
; CHECK-LABEL: @binop_reverse_elim(
6
-
; CHECK-NEXT: [[A:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[A1:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL:%.*]])
7
-
; CHECK-NEXT: [[B:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[B1:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
8
-
; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A]], [[B]]
9
-
; CHECK-NEXT: [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD1]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
10
-
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD_REV]]
6
+
; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
7
+
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD1]]
11
8
;
12
9
%a.rev = tailcall <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1true), i32%evl)
13
10
%b.rev = tailcall <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %b, <vscale x 4 x i1> splat (i1true), i32%evl)
@@ -16,8 +13,10 @@ define <vscale x 4 x i32> @binop_reverse_elim(<vscale x 4 x i32> %a, <vscale x 4
16
13
ret <vscale x 4 x i32> %add.rev
17
14
}
18
15
19
-
define <vscale x 4 x i32> @binop_reverse_elim2(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32%evl) {
20
-
; CHECK-LABEL: @binop_reverse_elim2(
16
+
; Negative test - the mask needs to be reversed between the inner and
17
+
; the outer to be correct.
18
+
define <vscale x 4 x i32> @binop_reverse_elim_samemask(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32%evl) {
19
+
; CHECK-LABEL: @binop_reverse_elim_samemask(
21
20
; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i1> [[M:%.*]], i32 [[EVL:%.*]])
22
21
; CHECK-NEXT: [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i1> [[M]], i32 [[EVL]])
23
22
; CHECK-NEXT: [[ADD:%.*]] = add nsw <vscale x 4 x i32> [[A_REV]], [[B_REV]]
@@ -48,10 +47,9 @@ define <vscale x 4 x i32> @binop_reverse_elim_diffmask(<vscale x 4 x i32> %a, <v
48
47
49
48
define <vscale x 4 x i32> @binop_reverse_elim_diffevl(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32%evl) {
50
49
; CHECK-LABEL: @binop_reverse_elim_diffevl(
51
-
; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL:%.*]])
52
-
; CHECK-NEXT: [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
53
-
; CHECK-NEXT: [[ADD:%.*]] = add nsw <vscale x 4 x i32> [[A_REV]], [[B_REV]]
54
-
; CHECK-NEXT: [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD]], <vscale x 4 x i1> splat (i1 true), i32 10)
50
+
; CHECK-NEXT: [[ADD:%.*]] = add nsw <vscale x 4 x i32> [[A_REV:%.*]], [[B_REV:%.*]]
51
+
; CHECK-NEXT: [[ADD1:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL:%.*]])
52
+
; CHECK-NEXT: [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD1]], <vscale x 4 x i1> splat (i1 true), i32 10)
55
53
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD_REV]]
56
54
;
57
55
%a.rev = tailcall <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1true), i32%evl)
@@ -63,10 +61,8 @@ define <vscale x 4 x i32> @binop_reverse_elim_diffevl(<vscale x 4 x i32> %a, <vs
63
61
64
62
define <vscale x 4 x i32> @binop_reverse_splat_elim(<vscale x 4 x i32> %a, i32%evl) {
65
63
; CHECK-LABEL: @binop_reverse_splat_elim(
66
-
; CHECK-NEXT: [[A:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[A1:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL:%.*]])
67
-
; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A]], splat (i32 22)
68
-
; CHECK-NEXT: [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD1]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
69
-
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD_REV]]
64
+
; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A:%.*]], splat (i32 22)
65
+
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD1]]
70
66
;
71
67
%a.rev = tailcall <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1true), i32%evl)
72
68
%add = addnsw <vscale x 4 x i32> %a.rev, splat (i3222)
@@ -76,23 +72,49 @@ define <vscale x 4 x i32> @binop_reverse_splat_elim(<vscale x 4 x i32> %a, i32 %
76
72
77
73
define <vscale x 4 x i32> @binop_reverse_splat_elim2(<vscale x 4 x i32> %a, i32%evl) {
78
74
; CHECK-LABEL: @binop_reverse_splat_elim2(
79
-
; CHECK-NEXT: [[A:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[A1:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL:%.*]])
80
-
; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A]], splat (i32 22)
81
-
; CHECK-NEXT: [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD1]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
82
-
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD_REV]]
75
+
; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A:%.*]], splat (i32 22)
76
+
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD1]]
83
77
;
84
78
%a.rev = tailcall <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1true), i32%evl)
85
79
%add = addnsw <vscale x 4 x i32> splat (i3222), %a.rev
86
80
%add.rev = tailcall <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %add, <vscale x 4 x i1> splat (i1true), i32%evl)
87
81
ret <vscale x 4 x i32> %add.rev
88
82
}
89
83
84
+
define <vscale x 4 x i32> @binop_reverse_splat_elim3(<vscale x 4 x i32> %a, i32%b, i32%evl) {
85
+
; CHECK-LABEL: @binop_reverse_splat_elim3(
86
+
; CHECK-NEXT: [[B_INS:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
87
+
; CHECK-NEXT: [[B_VEC:%.*]] = shufflevector <vscale x 4 x i32> [[B_INS]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
88
+
; CHECK-NEXT: [[ADD:%.*]] = add nsw <vscale x 4 x i32> [[B_VEC]], [[A_REV:%.*]]
89
+
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
90
+
;
91
+
%b.ins = insertelement <vscale x 4 x i32> poison, i32%b, i320
92
+
%b.vec = shufflevector <vscale x 4 x i32> %b.ins, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
93
+
%a.rev = tailcall <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1true), i32%evl)
94
+
%add = addnsw <vscale x 4 x i32> %b.vec, %a.rev
95
+
%add.rev = tailcall <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %add, <vscale x 4 x i1> splat (i1true), i32%evl)
96
+
ret <vscale x 4 x i32> %add.rev
97
+
}
98
+
99
+
define <vscale x 4 x i32> @binop_reverse_splat_elim4(<vscale x 4 x i32> %a, i32%b, i32%evl) {
100
+
; CHECK-LABEL: @binop_reverse_splat_elim4(
101
+
; CHECK-NEXT: [[B_INS:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
102
+
; CHECK-NEXT: [[B_VEC:%.*]] = shufflevector <vscale x 4 x i32> [[B_INS]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
103
+
; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A:%.*]], [[B_VEC]]
104
+
; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD1]]
105
+
;
106
+
%b.ins = insertelement <vscale x 4 x i32> poison, i32%b, i320
107
+
%b.vec = shufflevector <vscale x 4 x i32> %b.ins, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
108
+
%a.rev = tailcall <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1true), i32%evl)
109
+
%add = addnsw <vscale x 4 x i32> %a.rev, %b.vec
110
+
%add.rev = tailcall <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %add, <vscale x 4 x i1> splat (i1true), i32%evl)
111
+
ret <vscale x 4 x i32> %add.rev
112
+
}
113
+
90
114
define <vscale x 4 x float> @unop_reverse_splat_elim(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32%evl) {
91
115
; CHECK-LABEL: @unop_reverse_splat_elim(
92
-
; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> [[A:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL:%.*]])
93
-
; CHECK-NEXT: [[OP:%.*]] = fneg <vscale x 4 x float> [[A_REV]]
94
-
; CHECK-NEXT: [[OP_REV:%.*]] = tail call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> [[OP]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
95
-
; CHECK-NEXT: ret <vscale x 4 x float> [[OP_REV]]
116
+
; CHECK-NEXT: [[OP:%.*]] = fneg <vscale x 4 x float> [[A_REV:%.*]]
117
+
; CHECK-NEXT: ret <vscale x 4 x float> [[OP]]
96
118
;
97
119
%a.rev = tailcall <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> splat (i1true), i32%evl)
0 commit comments