@@ -10,12 +10,9 @@ define <vscale x 16 x i8> @sabd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
10
10
; CHECK-LABEL: sabd_b:
11
11
; CHECK: # %bb.0:
12
12
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
13
- ; CHECK-NEXT: vwsub.vv v12, v8, v10
14
- ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
15
- ; CHECK-NEXT: vrsub.vi v8, v12, 0
16
- ; CHECK-NEXT: vmax.vv v12, v12, v8
17
- ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
18
- ; CHECK-NEXT: vnsrl.wi v8, v12, 0
13
+ ; CHECK-NEXT: vmin.vv v12, v8, v10
14
+ ; CHECK-NEXT: vmax.vv v8, v8, v10
15
+ ; CHECK-NEXT: vsub.vv v8, v8, v12
19
16
; CHECK-NEXT: ret
20
17
%a.sext = sext <vscale x 16 x i8 > %a to <vscale x 16 x i16 >
21
18
%b.sext = sext <vscale x 16 x i8 > %b to <vscale x 16 x i16 >
@@ -33,9 +30,9 @@ define <vscale x 16 x i8> @sabd_b_promoted_ops(<vscale x 16 x i1> %a, <vscale x
33
30
; CHECK-NEXT: vmerge.vim v12, v10, -1, v0
34
31
; CHECK-NEXT: vmv1r.v v0, v8
35
32
; CHECK-NEXT: vmerge.vim v8, v10, -1, v0
36
- ; CHECK-NEXT: vsub .vv v8 , v12, v8
37
- ; CHECK-NEXT: vrsub.vi v10, v8, 0
38
- ; CHECK-NEXT: vmax .vv v8, v8, v10
33
+ ; CHECK-NEXT: vmin .vv v10 , v12, v8
34
+ ; CHECK-NEXT: vmax.vv v8, v12, v8
35
+ ; CHECK-NEXT: vsub .vv v8, v8, v10
39
36
; CHECK-NEXT: ret
40
37
%a.sext = sext <vscale x 16 x i1 > %a to <vscale x 16 x i8 >
41
38
%b.sext = sext <vscale x 16 x i1 > %b to <vscale x 16 x i8 >
@@ -48,12 +45,9 @@ define <vscale x 8 x i16> @sabd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
48
45
; CHECK-LABEL: sabd_h:
49
46
; CHECK: # %bb.0:
50
47
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
51
- ; CHECK-NEXT: vwsub.vv v12, v8, v10
52
- ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
53
- ; CHECK-NEXT: vrsub.vi v8, v12, 0
54
- ; CHECK-NEXT: vmax.vv v12, v12, v8
55
- ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
56
- ; CHECK-NEXT: vnsrl.wi v8, v12, 0
48
+ ; CHECK-NEXT: vmin.vv v12, v8, v10
49
+ ; CHECK-NEXT: vmax.vv v8, v8, v10
50
+ ; CHECK-NEXT: vsub.vv v8, v8, v12
57
51
; CHECK-NEXT: ret
58
52
%a.sext = sext <vscale x 8 x i16 > %a to <vscale x 8 x i32 >
59
53
%b.sext = sext <vscale x 8 x i16 > %b to <vscale x 8 x i32 >
@@ -67,10 +61,11 @@ define <vscale x 8 x i16> @sabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8
67
61
; CHECK-LABEL: sabd_h_promoted_ops:
68
62
; CHECK: # %bb.0:
69
63
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
70
- ; CHECK-NEXT: vwsub.vv v10, v8, v9
64
+ ; CHECK-NEXT: vmin.vv v10, v8, v9
65
+ ; CHECK-NEXT: vmax.vv v8, v8, v9
66
+ ; CHECK-NEXT: vsub.vv v10, v8, v10
71
67
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
72
- ; CHECK-NEXT: vrsub.vi v8, v10, 0
73
- ; CHECK-NEXT: vmax.vv v8, v10, v8
68
+ ; CHECK-NEXT: vzext.vf2 v8, v10
74
69
; CHECK-NEXT: ret
75
70
%a.sext = sext <vscale x 8 x i8 > %a to <vscale x 8 x i16 >
76
71
%b.sext = sext <vscale x 8 x i8 > %b to <vscale x 8 x i16 >
@@ -83,12 +78,9 @@ define <vscale x 4 x i32> @sabd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
83
78
; CHECK-LABEL: sabd_s:
84
79
; CHECK: # %bb.0:
85
80
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
86
- ; CHECK-NEXT: vwsub.vv v12, v8, v10
87
- ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
88
- ; CHECK-NEXT: vrsub.vi v8, v12, 0
89
- ; CHECK-NEXT: vmax.vv v12, v12, v8
90
- ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
91
- ; CHECK-NEXT: vnsrl.wi v8, v12, 0
81
+ ; CHECK-NEXT: vmin.vv v12, v8, v10
82
+ ; CHECK-NEXT: vmax.vv v8, v8, v10
83
+ ; CHECK-NEXT: vsub.vv v8, v8, v12
92
84
; CHECK-NEXT: ret
93
85
%a.sext = sext <vscale x 4 x i32 > %a to <vscale x 4 x i64 >
94
86
%b.sext = sext <vscale x 4 x i32 > %b to <vscale x 4 x i64 >
@@ -102,10 +94,11 @@ define <vscale x 4 x i32> @sabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x
102
94
; CHECK-LABEL: sabd_s_promoted_ops:
103
95
; CHECK: # %bb.0:
104
96
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
105
- ; CHECK-NEXT: vwsub.vv v10, v8, v9
97
+ ; CHECK-NEXT: vmin.vv v10, v8, v9
98
+ ; CHECK-NEXT: vmax.vv v8, v8, v9
99
+ ; CHECK-NEXT: vsub.vv v10, v8, v10
106
100
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
107
- ; CHECK-NEXT: vrsub.vi v8, v10, 0
108
- ; CHECK-NEXT: vmax.vv v8, v10, v8
101
+ ; CHECK-NEXT: vzext.vf2 v8, v10
109
102
; CHECK-NEXT: ret
110
103
%a.sext = sext <vscale x 4 x i16 > %a to <vscale x 4 x i32 >
111
104
%b.sext = sext <vscale x 4 x i16 > %b to <vscale x 4 x i32 >
@@ -128,10 +121,11 @@ define <vscale x 2 x i64> @sabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x
128
121
; CHECK-LABEL: sabd_d_promoted_ops:
129
122
; CHECK: # %bb.0:
130
123
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
131
- ; CHECK-NEXT: vwsub.vv v10, v8, v9
124
+ ; CHECK-NEXT: vmin.vv v10, v8, v9
125
+ ; CHECK-NEXT: vmax.vv v8, v8, v9
126
+ ; CHECK-NEXT: vsub.vv v10, v8, v10
132
127
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
133
- ; CHECK-NEXT: vrsub.vi v8, v10, 0
134
- ; CHECK-NEXT: vmax.vv v8, v10, v8
128
+ ; CHECK-NEXT: vzext.vf2 v8, v10
135
129
; CHECK-NEXT: ret
136
130
%a.sext = sext <vscale x 2 x i32 > %a to <vscale x 2 x i64 >
137
131
%b.sext = sext <vscale x 2 x i32 > %b to <vscale x 2 x i64 >
@@ -148,12 +142,9 @@ define <vscale x 16 x i8> @uabd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
148
142
; CHECK-LABEL: uabd_b:
149
143
; CHECK: # %bb.0:
150
144
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
151
- ; CHECK-NEXT: vwsubu.vv v12, v8, v10
152
- ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
153
- ; CHECK-NEXT: vrsub.vi v8, v12, 0
154
- ; CHECK-NEXT: vmax.vv v12, v12, v8
155
- ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
156
- ; CHECK-NEXT: vnsrl.wi v8, v12, 0
145
+ ; CHECK-NEXT: vminu.vv v12, v8, v10
146
+ ; CHECK-NEXT: vmaxu.vv v8, v8, v10
147
+ ; CHECK-NEXT: vsub.vv v8, v8, v12
157
148
; CHECK-NEXT: ret
158
149
%a.zext = zext <vscale x 16 x i8 > %a to <vscale x 16 x i16 >
159
150
%b.zext = zext <vscale x 16 x i8 > %b to <vscale x 16 x i16 >
@@ -171,9 +162,9 @@ define <vscale x 16 x i8> @uabd_b_promoted_ops(<vscale x 16 x i1> %a, <vscale x
171
162
; CHECK-NEXT: vmerge.vim v12, v10, 1, v0
172
163
; CHECK-NEXT: vmv1r.v v0, v8
173
164
; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
174
- ; CHECK-NEXT: vsub .vv v8 , v12, v8
175
- ; CHECK-NEXT: vrsub.vi v10, v8, 0
176
- ; CHECK-NEXT: vmax .vv v8, v8, v10
165
+ ; CHECK-NEXT: vminu .vv v10 , v12, v8
166
+ ; CHECK-NEXT: vmaxu.vv v8, v12, v8
167
+ ; CHECK-NEXT: vsub .vv v8, v8, v10
177
168
; CHECK-NEXT: ret
178
169
%a.zext = zext <vscale x 16 x i1 > %a to <vscale x 16 x i8 >
179
170
%b.zext = zext <vscale x 16 x i1 > %b to <vscale x 16 x i8 >
@@ -186,12 +177,9 @@ define <vscale x 8 x i16> @uabd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
186
177
; CHECK-LABEL: uabd_h:
187
178
; CHECK: # %bb.0:
188
179
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
189
- ; CHECK-NEXT: vwsubu.vv v12, v8, v10
190
- ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
191
- ; CHECK-NEXT: vrsub.vi v8, v12, 0
192
- ; CHECK-NEXT: vmax.vv v12, v12, v8
193
- ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
194
- ; CHECK-NEXT: vnsrl.wi v8, v12, 0
180
+ ; CHECK-NEXT: vminu.vv v12, v8, v10
181
+ ; CHECK-NEXT: vmaxu.vv v8, v8, v10
182
+ ; CHECK-NEXT: vsub.vv v8, v8, v12
195
183
; CHECK-NEXT: ret
196
184
%a.zext = zext <vscale x 8 x i16 > %a to <vscale x 8 x i32 >
197
185
%b.zext = zext <vscale x 8 x i16 > %b to <vscale x 8 x i32 >
@@ -205,10 +193,11 @@ define <vscale x 8 x i16> @uabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8
205
193
; CHECK-LABEL: uabd_h_promoted_ops:
206
194
; CHECK: # %bb.0:
207
195
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
208
- ; CHECK-NEXT: vwsubu.vv v10, v8, v9
196
+ ; CHECK-NEXT: vminu.vv v10, v8, v9
197
+ ; CHECK-NEXT: vmaxu.vv v8, v8, v9
198
+ ; CHECK-NEXT: vsub.vv v10, v8, v10
209
199
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
210
- ; CHECK-NEXT: vrsub.vi v8, v10, 0
211
- ; CHECK-NEXT: vmax.vv v8, v10, v8
200
+ ; CHECK-NEXT: vzext.vf2 v8, v10
212
201
; CHECK-NEXT: ret
213
202
%a.zext = zext <vscale x 8 x i8 > %a to <vscale x 8 x i16 >
214
203
%b.zext = zext <vscale x 8 x i8 > %b to <vscale x 8 x i16 >
@@ -221,12 +210,9 @@ define <vscale x 4 x i32> @uabd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
221
210
; CHECK-LABEL: uabd_s:
222
211
; CHECK: # %bb.0:
223
212
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
224
- ; CHECK-NEXT: vwsubu.vv v12, v8, v10
225
- ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
226
- ; CHECK-NEXT: vrsub.vi v8, v12, 0
227
- ; CHECK-NEXT: vmax.vv v12, v12, v8
228
- ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
229
- ; CHECK-NEXT: vnsrl.wi v8, v12, 0
213
+ ; CHECK-NEXT: vminu.vv v12, v8, v10
214
+ ; CHECK-NEXT: vmaxu.vv v8, v8, v10
215
+ ; CHECK-NEXT: vsub.vv v8, v8, v12
230
216
; CHECK-NEXT: ret
231
217
%a.zext = zext <vscale x 4 x i32 > %a to <vscale x 4 x i64 >
232
218
%b.zext = zext <vscale x 4 x i32 > %b to <vscale x 4 x i64 >
@@ -240,10 +226,11 @@ define <vscale x 4 x i32> @uabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x
240
226
; CHECK-LABEL: uabd_s_promoted_ops:
241
227
; CHECK: # %bb.0:
242
228
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
243
- ; CHECK-NEXT: vwsubu.vv v10, v8, v9
229
+ ; CHECK-NEXT: vminu.vv v10, v8, v9
230
+ ; CHECK-NEXT: vmaxu.vv v8, v8, v9
231
+ ; CHECK-NEXT: vsub.vv v10, v8, v10
244
232
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
245
- ; CHECK-NEXT: vrsub.vi v8, v10, 0
246
- ; CHECK-NEXT: vmax.vv v8, v10, v8
233
+ ; CHECK-NEXT: vzext.vf2 v8, v10
247
234
; CHECK-NEXT: ret
248
235
%a.zext = zext <vscale x 4 x i16 > %a to <vscale x 4 x i32 >
249
236
%b.zext = zext <vscale x 4 x i16 > %b to <vscale x 4 x i32 >
@@ -266,10 +253,11 @@ define <vscale x 2 x i64> @uabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x
266
253
; CHECK-LABEL: uabd_d_promoted_ops:
267
254
; CHECK: # %bb.0:
268
255
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
269
- ; CHECK-NEXT: vwsubu.vv v10, v8, v9
256
+ ; CHECK-NEXT: vminu.vv v10, v8, v9
257
+ ; CHECK-NEXT: vmaxu.vv v8, v8, v9
258
+ ; CHECK-NEXT: vsub.vv v10, v8, v10
270
259
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
271
- ; CHECK-NEXT: vrsub.vi v8, v10, 0
272
- ; CHECK-NEXT: vmax.vv v8, v10, v8
260
+ ; CHECK-NEXT: vzext.vf2 v8, v10
273
261
; CHECK-NEXT: ret
274
262
%a.zext = zext <vscale x 2 x i32 > %a to <vscale x 2 x i64 >
275
263
%b.zext = zext <vscale x 2 x i32 > %b to <vscale x 2 x i64 >
@@ -285,12 +273,9 @@ define <vscale x 4 x i32> @uabd_non_matching_extension(<vscale x 4 x i32> %a, <v
285
273
; CHECK: # %bb.0:
286
274
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
287
275
; CHECK-NEXT: vzext.vf4 v12, v10
288
- ; CHECK-NEXT: vwsubu.vv v16, v8, v12
289
- ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
290
- ; CHECK-NEXT: vrsub.vi v8, v16, 0
291
- ; CHECK-NEXT: vmax.vv v12, v16, v8
292
- ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
293
- ; CHECK-NEXT: vnsrl.wi v8, v12, 0
276
+ ; CHECK-NEXT: vminu.vv v10, v8, v12
277
+ ; CHECK-NEXT: vmaxu.vv v8, v8, v12
278
+ ; CHECK-NEXT: vsub.vv v8, v8, v10
294
279
; CHECK-NEXT: ret
295
280
%a.zext = zext <vscale x 4 x i32 > %a to <vscale x 4 x i64 >
296
281
%b.zext = zext <vscale x 4 x i8 > %b to <vscale x 4 x i64 >
@@ -307,10 +292,11 @@ define <vscale x 4 x i32> @uabd_non_matching_promoted_ops(<vscale x 4 x i8> %a,
307
292
; CHECK: # %bb.0:
308
293
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
309
294
; CHECK-NEXT: vzext.vf2 v10, v8
310
- ; CHECK-NEXT: vwsubu.vv v12, v10, v9
295
+ ; CHECK-NEXT: vminu.vv v8, v10, v9
296
+ ; CHECK-NEXT: vmaxu.vv v9, v10, v9
297
+ ; CHECK-NEXT: vsub.vv v10, v9, v8
311
298
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
312
- ; CHECK-NEXT: vrsub.vi v8, v12, 0
313
- ; CHECK-NEXT: vmax.vv v8, v12, v8
299
+ ; CHECK-NEXT: vzext.vf2 v8, v10
314
300
; CHECK-NEXT: ret
315
301
%a.zext = zext <vscale x 4 x i8 > %a to <vscale x 4 x i32 >
316
302
%b.zext = zext <vscale x 4 x i16 > %b to <vscale x 4 x i32 >
0 commit comments