2
2
; RUN: opt < %s -passes=aggressive-instcombine -S | FileCheck %s
3
3
4
4
; The LIT tests rely on i32, i16 and i8 being valid machine types.
5
- target datalayout = "n8:16:32"
5
+ ; The bounds checking tests require also i64 and i128.
6
+ target datalayout = "n8:16:32:64:128"
6
7
7
8
; This LIT test checks if TruncInstCombine pass correctly recognizes the
8
9
; constraints from a signed min-max clamp. The clamp is a sequence of smin and
@@ -12,6 +13,11 @@ target datalayout = "n8:16:32"
12
13
; of smin and smax:
13
14
; a) y = smax(smin(x, upper_limit), lower_limit)
14
15
; b) y = smin(smax(x, lower_limit), upper_limit)
16
+ ;
17
+ ; The clamp is used in TruncInstCombine.cpp pass (as part of aggressive-instcombine)
18
+ ; to optimize extensions and truncations of lshr. This is what is tested here.
19
+ ; The pass also optimizes extensions and truncations of other binary operators,
20
+ ; but in such cases the smin-smax clamp may not be used.
15
21
16
22
define i8 @test_0a (i16 %x ) {
17
23
; CHECK-LABEL: define i8 @test_0a(
@@ -47,6 +53,8 @@ define i8 @test_0b(i16 %x) {
47
53
ret i8 %b.trunc
48
54
}
49
55
56
+ ; The following two tests contain add instead of lshr.
57
+ ; The optimization works here as well.
50
58
define i8 @test_1a (i16 %x ) {
51
59
; CHECK-LABEL: define i8 @test_1a(
52
60
; CHECK-SAME: i16 [[X:%.*]]) {
@@ -81,19 +89,23 @@ define i8 @test_1b(i16 %x) {
81
89
ret i8 %b.trunc
82
90
}
83
91
92
+ ; Tests for clamping with negative min and max.
93
+
94
+ ; With sext no optimization occurs.
84
95
define i8 @test_2a (i16 %x ) {
85
96
; CHECK-LABEL: define i8 @test_2a(
86
97
; CHECK-SAME: i16 [[X:%.*]]) {
87
98
; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 -1)
88
99
; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 -31)
89
- ; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8
90
- ; CHECK-NEXT: [[B:%.*]] = add i8 [[A]], 2
91
- ; CHECK-NEXT: ret i8 [[B]]
100
+ ; CHECK-NEXT: [[A:%.*]] = sext i16 [[TMP2]] to i32
101
+ ; CHECK-NEXT: [[B:%.*]] = lshr i32 [[A]], 2
102
+ ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i32 [[B]] to i8
103
+ ; CHECK-NEXT: ret i8 [[B_TRUNC]]
92
104
;
93
105
%1 = tail call i16 @llvm.smin.i16 (i16 %x , i16 -1 )
94
106
%2 = tail call i16 @llvm.smax.i16 (i16 %1 , i16 -31 )
95
107
%a = sext i16 %2 to i32
96
- %b = add i32 %a , 2
108
+ %b = lshr i32 %a , 2
97
109
%b.trunc = trunc i32 %b to i8
98
110
ret i8 %b.trunc
99
111
}
@@ -103,31 +115,69 @@ define i8 @test_2b(i16 %x) {
103
115
; CHECK-SAME: i16 [[X:%.*]]) {
104
116
; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smax.i16(i16 [[X]], i16 -31)
105
117
; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP1]], i16 -1)
106
- ; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8
107
- ; CHECK-NEXT: [[B:%.*]] = add i8 [[A]], 2
108
- ; CHECK-NEXT: ret i8 [[B]]
118
+ ; CHECK-NEXT: [[A:%.*]] = sext i16 [[TMP2]] to i32
119
+ ; CHECK-NEXT: [[B:%.*]] = lshr i32 [[A]], 2
120
+ ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i32 [[B]] to i8
121
+ ; CHECK-NEXT: ret i8 [[B_TRUNC]]
109
122
;
110
123
%1 = tail call i16 @llvm.smax.i16 (i16 %x , i16 -31 )
111
124
%2 = tail call i16 @llvm.smin.i16 (i16 %1 , i16 -1 )
112
125
%a = sext i16 %2 to i32
113
- %b = add i32 %a , 2
126
+ %b = lshr i32 %a , 2
127
+ %b.trunc = trunc i32 %b to i8
128
+ ret i8 %b.trunc
129
+ }
130
+
131
+ ; With zext the optimization occurs.
132
+ define i8 @test_2c (i16 %x ) {
133
+ ; CHECK-LABEL: define i8 @test_2c(
134
+ ; CHECK-SAME: i16 [[X:%.*]]) {
135
+ ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 -1)
136
+ ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 -31)
137
+ ; CHECK-NEXT: [[B:%.*]] = lshr i16 [[TMP2]], 2
138
+ ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i16 [[B]] to i8
139
+ ; CHECK-NEXT: ret i8 [[B_TRUNC]]
140
+ ;
141
+ %1 = tail call i16 @llvm.smin.i16 (i16 %x , i16 -1 )
142
+ %2 = tail call i16 @llvm.smax.i16 (i16 %1 , i16 -31 )
143
+ %a = zext i16 %2 to i32
144
+ %b = lshr i32 %a , 2
114
145
%b.trunc = trunc i32 %b to i8
115
146
ret i8 %b.trunc
116
147
}
117
148
149
+ define i8 @test_2d (i16 %x ) {
150
+ ; CHECK-LABEL: define i8 @test_2d(
151
+ ; CHECK-SAME: i16 [[X:%.*]]) {
152
+ ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smax.i16(i16 [[X]], i16 -31)
153
+ ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP1]], i16 -1)
154
+ ; CHECK-NEXT: [[B:%.*]] = lshr i16 [[TMP2]], 2
155
+ ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i16 [[B]] to i8
156
+ ; CHECK-NEXT: ret i8 [[B_TRUNC]]
157
+ ;
158
+ %1 = tail call i16 @llvm.smax.i16 (i16 %x , i16 -31 )
159
+ %2 = tail call i16 @llvm.smin.i16 (i16 %1 , i16 -1 )
160
+ %a = zext i16 %2 to i32
161
+ %b = lshr i32 %a , 2
162
+ %b.trunc = trunc i32 %b to i8
163
+ ret i8 %b.trunc
164
+ }
165
+
166
+ ; Tests for clamping with mixed-signed min and max.
167
+ ; With zext the optimization occurs.
118
168
define i8 @test_3a (i16 %x ) {
119
169
; CHECK-LABEL: define i8 @test_3a(
120
170
; CHECK-SAME: i16 [[X:%.*]]) {
121
171
; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 31)
122
172
; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 -31)
123
- ; CHECK-NEXT: [[A :%.*]] = trunc i16 [[TMP2]] to i8
124
- ; CHECK-NEXT: [[B :%.*]] = add i8 [[A]], 2
125
- ; CHECK-NEXT: ret i8 [[B ]]
173
+ ; CHECK-NEXT: [[B :%.*]] = lshr i16 [[TMP2]], 2
174
+ ; CHECK-NEXT: [[B_TRUNC :%.*]] = trunc i16 [[B]] to i8
175
+ ; CHECK-NEXT: ret i8 [[B_TRUNC ]]
126
176
;
127
177
%1 = tail call i16 @llvm.smin.i16 (i16 %x , i16 31 )
128
178
%2 = tail call i16 @llvm.smax.i16 (i16 %1 , i16 -31 )
129
- %a = sext i16 %2 to i32
130
- %b = add i32 %a , 2
179
+ %a = zext i16 %2 to i32
180
+ %b = lshr i32 %a , 2
131
181
%b.trunc = trunc i32 %b to i8
132
182
ret i8 %b.trunc
133
183
}
@@ -137,31 +187,32 @@ define i8 @test_3b(i16 %x) {
137
187
; CHECK-SAME: i16 [[X:%.*]]) {
138
188
; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smax.i16(i16 [[X]], i16 -31)
139
189
; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP1]], i16 31)
140
- ; CHECK-NEXT: [[A :%.*]] = trunc i16 [[TMP2]] to i8
141
- ; CHECK-NEXT: [[B :%.*]] = add i8 [[A]], 2
142
- ; CHECK-NEXT: ret i8 [[B ]]
190
+ ; CHECK-NEXT: [[B :%.*]] = lshr i16 [[TMP2]], 2
191
+ ; CHECK-NEXT: [[B_TRUNC :%.*]] = trunc i16 [[B]] to i8
192
+ ; CHECK-NEXT: ret i8 [[B_TRUNC ]]
143
193
;
144
194
%1 = tail call i16 @llvm.smax.i16 (i16 %x , i16 -31 )
145
195
%2 = tail call i16 @llvm.smin.i16 (i16 %1 , i16 31 )
146
- %a = sext i16 %2 to i32
147
- %b = add i32 %a , 2
196
+ %a = zext i16 %2 to i32
197
+ %b = lshr i32 %a , 2
148
198
%b.trunc = trunc i32 %b to i8
149
199
ret i8 %b.trunc
150
200
}
151
201
202
+ ; Optimizations with vector types.
152
203
define <16 x i8 > @test_vec_1a (<16 x i16 > %x ) {
153
204
; CHECK-LABEL: define <16 x i8> @test_vec_1a(
154
205
; CHECK-SAME: <16 x i16> [[X:%.*]]) {
155
206
; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> [[X]], <16 x i16> splat (i16 127))
156
207
; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> [[TMP1]], <16 x i16> zeroinitializer)
157
208
; CHECK-NEXT: [[A:%.*]] = trunc <16 x i16> [[TMP2]] to <16 x i8>
158
- ; CHECK-NEXT: [[B:%.*]] = add <16 x i8> [[A]], splat (i8 2)
209
+ ; CHECK-NEXT: [[B:%.*]] = lshr <16 x i8> [[A]], splat (i8 2)
159
210
; CHECK-NEXT: ret <16 x i8> [[B]]
160
211
;
161
212
%1 = tail call <16 x i16 > @llvm.smin.v16i16 (<16 x i16 > %x , <16 x i16 > splat (i16 127 ))
162
213
%2 = tail call <16 x i16 > @llvm.smax.v16i16 (<16 x i16 > %1 , <16 x i16 > zeroinitializer )
163
214
%a = sext <16 x i16 > %2 to <16 x i32 >
164
- %b = add <16 x i32 > %a , splat (i32 2 )
215
+ %b = lshr <16 x i32 > %a , splat (i32 2 )
165
216
%b.trunc = trunc <16 x i32 > %b to <16 x i8 >
166
217
ret <16 x i8 > %b.trunc
167
218
}
@@ -172,13 +223,13 @@ define <16 x i8> @test_vec_1b(<16 x i16> %x) {
172
223
; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> [[X]], <16 x i16> zeroinitializer)
173
224
; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> [[TMP1]], <16 x i16> splat (i16 127))
174
225
; CHECK-NEXT: [[A:%.*]] = trunc <16 x i16> [[TMP2]] to <16 x i8>
175
- ; CHECK-NEXT: [[B:%.*]] = add <16 x i8> [[A]], splat (i8 2)
226
+ ; CHECK-NEXT: [[B:%.*]] = lshr <16 x i8> [[A]], splat (i8 2)
176
227
; CHECK-NEXT: ret <16 x i8> [[B]]
177
228
;
178
229
%1 = tail call <16 x i16 > @llvm.smax.v16i16 (<16 x i16 > %x , <16 x i16 > zeroinitializer )
179
230
%2 = tail call <16 x i16 > @llvm.smin.v16i16 (<16 x i16 > %1 , <16 x i16 > splat (i16 127 ))
180
231
%a = sext <16 x i16 > %2 to <16 x i32 >
181
- %b = add <16 x i32 > %a , splat (i32 2 )
232
+ %b = lshr <16 x i32 > %a , splat (i32 2 )
182
233
%b.trunc = trunc <16 x i32 > %b to <16 x i8 >
183
234
ret <16 x i8 > %b.trunc
184
235
}
@@ -217,14 +268,14 @@ define i8 @test_bounds_1(i16 %x) {
217
268
; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 127)
218
269
; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 0)
219
270
; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8
220
- ; CHECK-NEXT: [[SHR :%.*]] = ashr i8 [[A]], 7
221
- ; CHECK-NEXT: ret i8 [[SHR ]]
271
+ ; CHECK-NEXT: [[B :%.*]] = lshr i8 [[A]], 7
272
+ ; CHECK-NEXT: ret i8 [[B ]]
222
273
;
223
274
%1 = tail call i16 @llvm.smin.i16 (i16 %x , i16 127 )
224
275
%2 = tail call i16 @llvm.smax.i16 (i16 %1 , i16 0 )
225
276
%a = sext i16 %2 to i32
226
- %shr = ashr i32 %a , 7
227
- %b.trunc = trunc i32 %shr to i8
277
+ %b = lshr i32 %a , 7
278
+ %b.trunc = trunc i32 %b to i8
228
279
ret i8 %b.trunc
229
280
}
230
281
@@ -234,15 +285,15 @@ define i8 @test_bounds_2(i16 %x) {
234
285
; CHECK-SAME: i16 [[X:%.*]]) {
235
286
; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 128)
236
287
; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 0)
237
- ; CHECK-NEXT: [[SHR :%.*]] = ashr i16 [[TMP2]], 7
238
- ; CHECK-NEXT: [[B_TRUNC :%.*]] = trunc i16 [[SHR]] to i8
239
- ; CHECK-NEXT: ret i8 [[B_TRUNC ]]
288
+ ; CHECK-NEXT: [[A :%.*]] = trunc i16 [[TMP2]] to i8
289
+ ; CHECK-NEXT: [[B :%.*]] = lshr i8 [[A]], 7
290
+ ; CHECK-NEXT: ret i8 [[B ]]
240
291
;
241
292
%1 = tail call i16 @llvm.smin.i16 (i16 %x , i16 128 )
242
293
%2 = tail call i16 @llvm.smax.i16 (i16 %1 , i16 0 )
243
294
%a = sext i16 %2 to i32
244
- %shr = ashr i32 %a , 7
245
- %b.trunc = trunc i32 %shr to i8
295
+ %b = lshr i32 %a , 7
296
+ %b.trunc = trunc i32 %b to i8
246
297
ret i8 %b.trunc
247
298
}
248
299
@@ -253,14 +304,85 @@ define i8 @test_bounds_3(i16 %x) {
253
304
; CHECK-SAME: i16 [[X:%.*]]) {
254
305
; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 32767)
255
306
; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 32752)
256
- ; CHECK-NEXT: [[A :%.*]] = trunc i16 [[TMP2]] to i8
257
- ; CHECK-NEXT: [[AND :%.*]] = and i8 [[A]], -1
258
- ; CHECK-NEXT: ret i8 [[AND ]]
307
+ ; CHECK-NEXT: [[B :%.*]] = lshr i16 [[TMP2]], 2
308
+ ; CHECK-NEXT: [[B_TRUNC :%.*]] = trunc i16 [[B]] to i8
309
+ ; CHECK-NEXT: ret i8 [[B_TRUNC ]]
259
310
;
260
311
%1 = tail call i16 @llvm.smin.i16 (i16 %x , i16 32767 )
261
312
%2 = tail call i16 @llvm.smax.i16 (i16 %1 , i16 32752 )
262
313
%a = sext i16 %2 to i32
263
- %and = and i32 %a , 255
264
- %b.trunc = trunc i32 %and to i8
314
+ %b = lshr i32 %a , 2
315
+ %b.trunc = trunc i32 %b to i8
316
+ ret i8 %b.trunc
317
+ }
318
+
319
+ ; Here min = 128 is greater than max = 0.
320
+ define i8 @test_bounds_4 (i16 %x ) {
321
+ ; CHECK-LABEL: define i8 @test_bounds_4(
322
+ ; CHECK-SAME: i16 [[X:%.*]]) {
323
+ ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 0)
324
+ ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 128)
325
+ ; CHECK-NEXT: [[B:%.*]] = lshr i16 [[TMP2]], 2
326
+ ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i16 [[B]] to i8
327
+ ; CHECK-NEXT: ret i8 [[B_TRUNC]]
328
+ ;
329
+ %1 = tail call i16 @llvm.smin.i16 (i16 %x , i16 0 )
330
+ %2 = tail call i16 @llvm.smax.i16 (i16 %1 , i16 128 )
331
+ %a = sext i16 %2 to i32
332
+ %b = lshr i32 %a , 2
333
+ %b.trunc = trunc i32 %b to i8
334
+ ret i8 %b.trunc
335
+ }
336
+
337
+ ; The following 3 tests check the situation where min and max are minimal and
338
+ ; maximal signed values. No transformations should occur here.
339
+ define i8 @test_bounds_5 (i16 %x ) {
340
+ ; CHECK-LABEL: define i8 @test_bounds_5(
341
+ ; CHECK-SAME: i16 [[X:%.*]]) {
342
+ ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 32767)
343
+ ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 -32768)
344
+ ; CHECK-NEXT: [[B:%.*]] = lshr i16 [[TMP2]], 2
345
+ ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i16 [[B]] to i8
346
+ ; CHECK-NEXT: ret i8 [[B_TRUNC]]
347
+ ;
348
+ %1 = tail call i16 @llvm.smin.i16 (i16 %x , i16 32767 )
349
+ %2 = tail call i16 @llvm.smax.i16 (i16 %1 , i16 -32768 )
350
+ %a = zext i16 %2 to i32
351
+ %b = lshr i32 %a , 2
352
+ %b.trunc = trunc i32 %b to i8
353
+ ret i8 %b.trunc
354
+ }
355
+
356
+ define i8 @test_bounds_6 (i32 %x ) {
357
+ ; CHECK-LABEL: define i8 @test_bounds_6(
358
+ ; CHECK-SAME: i32 [[X:%.*]]) {
359
+ ; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.smin.i32(i32 [[X]], i32 2147483647)
360
+ ; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.smax.i32(i32 [[TMP1]], i32 -2147483648)
361
+ ; CHECK-NEXT: [[B:%.*]] = lshr i32 [[TMP2]], 2
362
+ ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i32 [[B]] to i8
363
+ ; CHECK-NEXT: ret i8 [[B_TRUNC]]
364
+ ;
365
+ %1 = tail call i32 @llvm.smin.i32 (i32 %x , i32 2147483647 )
366
+ %2 = tail call i32 @llvm.smax.i32 (i32 %1 , i32 -2147483648 )
367
+ %a = zext i32 %2 to i64
368
+ %b = lshr i64 %a , 2
369
+ %b.trunc = trunc i64 %b to i8
370
+ ret i8 %b.trunc
371
+ }
372
+
373
+ define i8 @test_bounds_7 (i64 %x ) {
374
+ ; CHECK-LABEL: define i8 @test_bounds_7(
375
+ ; CHECK-SAME: i64 [[X:%.*]]) {
376
+ ; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.smin.i64(i64 [[X]], i64 9223372036854775807)
377
+ ; CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP1]], i64 -9223372036854775808)
378
+ ; CHECK-NEXT: [[B:%.*]] = lshr i64 [[TMP2]], 2
379
+ ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i64 [[B]] to i8
380
+ ; CHECK-NEXT: ret i8 [[B_TRUNC]]
381
+ ;
382
+ %1 = tail call i64 @llvm.smin.i64 (i64 %x , i64 9223372036854775807 )
383
+ %2 = tail call i64 @llvm.smax.i64 (i64 %1 , i64 -9223372036854775808 )
384
+ %a = zext i64 %2 to i128
385
+ %b = lshr i128 %a , 2
386
+ %b.trunc = trunc i128 %b to i8
265
387
ret i8 %b.trunc
266
388
}
0 commit comments