Skip to content

Commit f6a4820

Browse files
committed
[TypePromotion] Support positive addition amounts in isSafeWrap.
We can support these by changing the sext promotion to -zext(-C) and replacing a sgt check with ugt. Reframing the logic in terms of how the unsigned range are affected. More comments in the patch.
1 parent 017675f commit f6a4820

11 files changed

+470
-307
lines changed

llvm/lib/CodeGen/TypePromotion.cpp

Lines changed: 52 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -272,64 +272,58 @@ bool TypePromotionImpl::isSink(Value *V) {
272272

273273
/// Return whether this instruction can safely wrap.
274274
bool TypePromotionImpl::isSafeWrap(Instruction *I) {
275-
// We can support a potentially wrapping instruction (I) if:
275+
// We can support a potentially wrapping Add/Sub instruction (I) if:
276276
// - It is only used by an unsigned icmp.
277277
// - The icmp uses a constant.
278-
// - The wrapping value (I) is decreasing, i.e would underflow - wrapping
279-
// around zero to become a larger number than before.
280278
// - The wrapping instruction (I) also uses a constant.
281279
//
282-
// We can then use the two constants to calculate whether the result would
283-
// wrap in respect to itself in the original bitwidth. If it doesn't wrap,
284-
// just underflows the range, the icmp would give the same result whether the
285-
// result has been truncated or not. We calculate this by:
286-
// - Zero extending both constants, if needed, to RegisterBitWidth.
287-
// - Take the absolute value of I's constant, adding this to the icmp const.
288-
// - Check that this value is not out of range for small type. If it is, it
289-
// means that it has underflowed enough to wrap around the icmp constant.
280+
// This a common pattern emitted to check if a value is within a range.
290281
//
291282
// For example:
292283
//
293-
// %sub = sub i8 %a, 2
294-
// %cmp = icmp ule i8 %sub, 254
284+
// %sub = sub i8 %a, C1
285+
// %cmp = icmp ule i8 %sub, C2
286+
//
287+
// or
295288
//
296-
// If %a = 0, %sub = -2 == FE == 254
297-
// But if this is evalulated as a i32
298-
// %sub = -2 == FF FF FF FE == 4294967294
299-
// So the unsigned compares (i8 and i32) would not yield the same result.
289+
// %add = add i8 %a, C1
290+
// %cmp = icmp ule i8 %add, C2.
300291
//
301-
// Another way to look at it is:
302-
// %a - 2 <= 254
303-
// %a + 2 <= 254 + 2
304-
// %a <= 256
305-
// And we can't represent 256 in the i8 format, so we don't support it.
292+
// We will treat an add as though it were a subtract by -C1. To promote
293+
// the Add/Sub we will zero extend the LHS and the subtracted amount. For Add,
294+
// this means we need to negate the constant, zero extend to RegisterBitWidth,
295+
// and negate in the larger type.
306296
//
307-
// Whereas:
297+
// This will produce a value in the range [-zext(C1), zext(X)-zext(C1)] where
298+
// C1 is the subtracted amount. This is either a small unsigned number or a
299+
// large unsigned number in the promoted type.
308300
//
309-
// %sub i8 %a, 1
301+
// Now we need to correct the compare constant C2. Values >= C1 in the
302+
// original add result range have been remapped to large values in the
303+
// promoted range. If the compare constant fell into this range we need to
304+
// remap it as well. We can do this as -(zext(-C2)).
305+
//
306+
// For example:
307+
//
308+
// %sub = sub i8 %a, 2
310309
// %cmp = icmp ule i8 %sub, 254
311310
//
312-
// If %a = 0, %sub = -1 == FF == 255
313-
// As i32:
314-
// %sub = -1 == FF FF FF FF == 4294967295
311+
// becomes
315312
//
316-
// In this case, the unsigned compare results would be the same and this
317-
// would also be true for ult, uge and ugt:
318-
// - (255 < 254) == (0xFFFFFFFF < 254) == false
319-
// - (255 <= 254) == (0xFFFFFFFF <= 254) == false
320-
// - (255 > 254) == (0xFFFFFFFF > 254) == true
321-
// - (255 >= 254) == (0xFFFFFFFF >= 254) == true
313+
// %zext = zext %a to i32
314+
// %sub = sub i32 %zext, 2
315+
// %cmp = icmp ule i32 %sub, 4294967294
322316
//
323-
// To demonstrate why we can't handle increasing values:
317+
// Another example:
324318
//
325-
// %add = add i8 %a, 2
326-
// %cmp = icmp ult i8 %add, 127
319+
// %sub = sub i8 %a, 1
320+
// %cmp = icmp ule i8 %sub, 254
327321
//
328-
// If %a = 254, %add = 256 == (i8 1)
329-
// As i32:
330-
// %add = 256
322+
// becomes
331323
//
332-
// (1 < 127) != (256 < 127)
324+
// %zext = zext %a to i32
325+
// %sub = sub i32 %zext, 1
326+
// %cmp = icmp ule i32 %sub, 254
333327

334328
unsigned Opc = I->getOpcode();
335329
if (Opc != Instruction::Add && Opc != Instruction::Sub)
@@ -356,15 +350,10 @@ bool TypePromotionImpl::isSafeWrap(Instruction *I) {
356350
APInt OverflowConst = cast<ConstantInt>(I->getOperand(1))->getValue();
357351
if (Opc == Instruction::Sub)
358352
OverflowConst = -OverflowConst;
359-
if (!OverflowConst.isNonPositive())
360-
return false;
361353

362354
SafeWrap.insert(I);
363355

364-
// Using C1 = OverflowConst and C2 = ICmpConst, we can either prove that:
365-
// zext(x) + sext(C1) <u zext(C2) if C1 < 0 and C1 >s C2
366-
// zext(x) + sext(C1) <u sext(C2) if C1 < 0 and C1 <=s C2
367-
if (OverflowConst.sgt(ICmpConst)) {
356+
if (OverflowConst.ugt(ICmpConst)) {
368357
LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
369358
<< "const of " << *I << "\n");
370359
return true;
@@ -487,18 +476,24 @@ void IRPromoter::PromoteTree() {
487476
continue;
488477

489478
if (auto *Const = dyn_cast<ConstantInt>(Op)) {
490-
// For subtract, we don't need to sext the constant. We only put it in
479+
// For subtract, we only need to zext the constant. We only put it in
491480
// SafeWrap because SafeWrap.size() is used elsewhere.
492-
// For cmp, we need to sign extend a constant appearing in either
493-
// operand. For add, we should only sign extend the RHS.
494-
Constant *NewConst =
495-
ConstantInt::get(Const->getContext(),
496-
(SafeWrap.contains(I) &&
497-
(I->getOpcode() == Instruction::ICmp || i == 1) &&
498-
I->getOpcode() != Instruction::Sub)
499-
? Const->getValue().sext(PromotedWidth)
500-
: Const->getValue().zext(PromotedWidth));
501-
I->setOperand(i, NewConst);
481+
// For Add and ICmp we need to find how far the constant is from the
482+
// top of its original unsigned range and place it the same distance
483+
// from the top of its new unsigned range. We can do this by negating
484+
// the constant, zero extending it, then negating in the new type.
485+
APInt NewConst;
486+
if (SafeWrap.contains(I)) {
487+
if (I->getOpcode() == Instruction::ICmp)
488+
NewConst = -((-Const->getValue()).zext(PromotedWidth));
489+
else if (I->getOpcode() == Instruction::Add && i == 1)
490+
NewConst = -((-Const->getValue()).zext(PromotedWidth));
491+
else
492+
NewConst = Const->getValue().zext(PromotedWidth);
493+
} else
494+
NewConst = Const->getValue().zext(PromotedWidth);
495+
496+
I->setOperand(i, ConstantInt::get(Const->getContext(), NewConst));
502497
} else if (isa<UndefValue>(Op))
503498
I->setOperand(i, ConstantInt::get(ExtTy, 0));
504499
}

llvm/test/CodeGen/AArch64/and-mask-removal.ll

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,8 @@ if.end: ; preds = %if.then, %entry
6565
define zeroext i1 @test8_0(i8 zeroext %x) align 2 {
6666
; CHECK-LABEL: test8_0:
6767
; CHECK: ; %bb.0: ; %entry
68-
; CHECK-NEXT: add w8, w0, #74
69-
; CHECK-NEXT: and w8, w8, #0xff
70-
; CHECK-NEXT: cmp w8, #236
68+
; CHECK-NEXT: sub w8, w0, #182
69+
; CHECK-NEXT: cmn w8, #20
7170
; CHECK-NEXT: cset w0, lo
7271
; CHECK-NEXT: ret
7372
entry:
@@ -295,20 +294,20 @@ ret_true:
295294
define zeroext i1 @test16_2(i16 zeroext %x) align 2 {
296295
; CHECK-SD-LABEL: test16_2:
297296
; CHECK-SD: ; %bb.0: ; %entry
298-
; CHECK-SD-NEXT: mov w8, #16882 ; =0x41f2
299-
; CHECK-SD-NEXT: mov w9, #40700 ; =0x9efc
297+
; CHECK-SD-NEXT: mov w8, #-48654 ; =0xffff41f2
298+
; CHECK-SD-NEXT: mov w9, #-24836 ; =0xffff9efc
300299
; CHECK-SD-NEXT: add w8, w0, w8
301-
; CHECK-SD-NEXT: cmp w9, w8, uxth
302-
; CHECK-SD-NEXT: cset w0, hi
300+
; CHECK-SD-NEXT: cmp w8, w9
301+
; CHECK-SD-NEXT: cset w0, lo
303302
; CHECK-SD-NEXT: ret
304303
;
305304
; CHECK-GI-LABEL: test16_2:
306305
; CHECK-GI: ; %bb.0: ; %entry
307-
; CHECK-GI-NEXT: mov w8, #16882 ; =0x41f2
308-
; CHECK-GI-NEXT: mov w9, #40699 ; =0x9efb
306+
; CHECK-GI-NEXT: mov w8, #-48654 ; =0xffff41f2
307+
; CHECK-GI-NEXT: mov w9, #-24837 ; =0xffff9efb
309308
; CHECK-GI-NEXT: add w8, w0, w8
310-
; CHECK-GI-NEXT: cmp w9, w8, uxth
311-
; CHECK-GI-NEXT: cset w0, hs
309+
; CHECK-GI-NEXT: cmp w8, w9
310+
; CHECK-GI-NEXT: cset w0, ls
312311
; CHECK-GI-NEXT: ret
313312
entry:
314313
%0 = add i16 %x, 16882
@@ -349,20 +348,20 @@ ret_true:
349348
define zeroext i1 @test16_4(i16 zeroext %x) align 2 {
350349
; CHECK-SD-LABEL: test16_4:
351350
; CHECK-SD: ; %bb.0: ; %entry
352-
; CHECK-SD-NEXT: mov w8, #29985 ; =0x7521
351+
; CHECK-SD-NEXT: mov w8, #-35551 ; =0xffff7521
353352
; CHECK-SD-NEXT: mov w9, #15676 ; =0x3d3c
354353
; CHECK-SD-NEXT: add w8, w0, w8
355-
; CHECK-SD-NEXT: cmp w9, w8, uxth
356-
; CHECK-SD-NEXT: cset w0, lo
354+
; CHECK-SD-NEXT: cmp w8, w9
355+
; CHECK-SD-NEXT: cset w0, hi
357356
; CHECK-SD-NEXT: ret
358357
;
359358
; CHECK-GI-LABEL: test16_4:
360359
; CHECK-GI: ; %bb.0: ; %entry
361-
; CHECK-GI-NEXT: mov w8, #29985 ; =0x7521
360+
; CHECK-GI-NEXT: mov w8, #-35551 ; =0xffff7521
362361
; CHECK-GI-NEXT: mov w9, #15677 ; =0x3d3d
363362
; CHECK-GI-NEXT: add w8, w0, w8
364-
; CHECK-GI-NEXT: cmp w9, w8, uxth
365-
; CHECK-GI-NEXT: cset w0, ls
363+
; CHECK-GI-NEXT: cmp w8, w9
364+
; CHECK-GI-NEXT: cset w0, hs
366365
; CHECK-GI-NEXT: ret
367366
entry:
368367
%0 = add i16 %x, -35551
@@ -431,20 +430,20 @@ ret_true:
431430
define zeroext i1 @test16_7(i16 zeroext %x) align 2 {
432431
; CHECK-SD-LABEL: test16_7:
433432
; CHECK-SD: ; %bb.0: ; %entry
434-
; CHECK-SD-NEXT: mov w8, #9272 ; =0x2438
435-
; CHECK-SD-NEXT: mov w9, #22619 ; =0x585b
433+
; CHECK-SD-NEXT: mov w8, #-56264 ; =0xffff2438
434+
; CHECK-SD-NEXT: mov w9, #-42917 ; =0xffff585b
436435
; CHECK-SD-NEXT: add w8, w0, w8
437-
; CHECK-SD-NEXT: cmp w9, w8, uxth
438-
; CHECK-SD-NEXT: cset w0, lo
436+
; CHECK-SD-NEXT: cmp w8, w9
437+
; CHECK-SD-NEXT: cset w0, hi
439438
; CHECK-SD-NEXT: ret
440439
;
441440
; CHECK-GI-LABEL: test16_7:
442441
; CHECK-GI: ; %bb.0: ; %entry
443-
; CHECK-GI-NEXT: mov w8, #9272 ; =0x2438
444-
; CHECK-GI-NEXT: mov w9, #22620 ; =0x585c
442+
; CHECK-GI-NEXT: mov w8, #-56264 ; =0xffff2438
443+
; CHECK-GI-NEXT: mov w9, #-42916 ; =0xffff585c
445444
; CHECK-GI-NEXT: add w8, w0, w8
446-
; CHECK-GI-NEXT: cmp w9, w8, uxth
447-
; CHECK-GI-NEXT: cset w0, ls
445+
; CHECK-GI-NEXT: cmp w8, w9
446+
; CHECK-GI-NEXT: cset w0, hs
448447
; CHECK-GI-NEXT: ret
449448
entry:
450449
%0 = add i16 %x, 9272
@@ -508,16 +507,17 @@ define i64 @pr58109(i8 signext %0) {
508507
define i64 @pr58109b(i8 signext %0, i64 %a, i64 %b) {
509508
; CHECK-SD-LABEL: pr58109b:
510509
; CHECK-SD: ; %bb.0:
511-
; CHECK-SD-NEXT: add w8, w0, #1
512-
; CHECK-SD-NEXT: tst w8, #0xfe
513-
; CHECK-SD-NEXT: csel x0, x1, x2, eq
510+
; CHECK-SD-NEXT: and w8, w0, #0xff
511+
; CHECK-SD-NEXT: sub w8, w8, #255
512+
; CHECK-SD-NEXT: cmn w8, #254
513+
; CHECK-SD-NEXT: csel x0, x1, x2, lo
514514
; CHECK-SD-NEXT: ret
515515
;
516516
; CHECK-GI-LABEL: pr58109b:
517517
; CHECK-GI: ; %bb.0:
518-
; CHECK-GI-NEXT: add w8, w0, #1
519-
; CHECK-GI-NEXT: and w8, w8, #0xff
520-
; CHECK-GI-NEXT: cmp w8, #2
518+
; CHECK-GI-NEXT: mov w8, #-255 ; =0xffffff01
519+
; CHECK-GI-NEXT: add w8, w8, w0, uxtb
520+
; CHECK-GI-NEXT: cmn w8, #254
521521
; CHECK-GI-NEXT: csel x0, x1, x2, lo
522522
; CHECK-GI-NEXT: ret
523523
%2 = add i8 %0, 1

llvm/test/CodeGen/AArch64/lack-of-signed-truncation-check.ll

Lines changed: 36 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -187,10 +187,11 @@ define i1 @add_ulecmp_i16_i8(i16 %x) nounwind {
187187
define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
188188
; CHECK-LABEL: add_ugecmp_i16_i8:
189189
; CHECK: // %bb.0:
190-
; CHECK-NEXT: sxtb w8, w0
191-
; CHECK-NEXT: and w8, w8, #0xffff
192-
; CHECK-NEXT: cmp w8, w0, uxth
193-
; CHECK-NEXT: cset w0, ne
190+
; CHECK-NEXT: mov w8, #-65408 // =0xffff0080
191+
; CHECK-NEXT: mov w9, #-65281 // =0xffff00ff
192+
; CHECK-NEXT: add w8, w8, w0, uxth
193+
; CHECK-NEXT: cmp w8, w9
194+
; CHECK-NEXT: cset w0, hi
194195
; CHECK-NEXT: ret
195196
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
196197
%tmp1 = icmp uge i16 %tmp0, 256 ; 1U << 8
@@ -256,10 +257,11 @@ define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
256257
define i1 @add_ugtcmp_i16_i8(i16 %x) nounwind {
257258
; CHECK-LABEL: add_ugtcmp_i16_i8:
258259
; CHECK: // %bb.0:
259-
; CHECK-NEXT: sxtb w8, w0
260-
; CHECK-NEXT: and w8, w8, #0xffff
261-
; CHECK-NEXT: cmp w8, w0, uxth
262-
; CHECK-NEXT: cset w0, ne
260+
; CHECK-NEXT: mov w8, #-65408 // =0xffff0080
261+
; CHECK-NEXT: mov w9, #-65281 // =0xffff00ff
262+
; CHECK-NEXT: add w8, w8, w0, uxth
263+
; CHECK-NEXT: cmp w8, w9
264+
; CHECK-NEXT: cset w0, hi
263265
; CHECK-NEXT: ret
264266
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
265267
%tmp1 = icmp ugt i16 %tmp0, 255 ; (1U << 8) - 1
@@ -301,9 +303,10 @@ define i1 @add_ugecmp_bad_i16_i8_cmp(i16 %x, i16 %y) nounwind {
301303
define i1 @add_ugecmp_bad_i8_i16(i16 %x) nounwind {
302304
; CHECK-LABEL: add_ugecmp_bad_i8_i16:
303305
; CHECK: // %bb.0:
304-
; CHECK-NEXT: add w8, w0, #128
305-
; CHECK-NEXT: and w8, w8, #0xffff
306-
; CHECK-NEXT: cmp w8, #127
306+
; CHECK-NEXT: mov w8, #-65408 // =0xffff0080
307+
; CHECK-NEXT: mov w9, #-65409 // =0xffff007f
308+
; CHECK-NEXT: add w8, w8, w0, uxth
309+
; CHECK-NEXT: cmp w8, w9
307310
; CHECK-NEXT: cset w0, hi
308311
; CHECK-NEXT: ret
309312
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
@@ -315,9 +318,10 @@ define i1 @add_ugecmp_bad_i8_i16(i16 %x) nounwind {
315318
define i1 @add_ugecmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind {
316319
; CHECK-LABEL: add_ugecmp_bad_i16_i8_c0notpoweroftwo:
317320
; CHECK: // %bb.0:
318-
; CHECK-NEXT: add w8, w0, #192
319-
; CHECK-NEXT: and w8, w8, #0xffff
320-
; CHECK-NEXT: cmp w8, #255
321+
; CHECK-NEXT: mov w8, #-65344 // =0xffff00c0
322+
; CHECK-NEXT: mov w9, #-65281 // =0xffff00ff
323+
; CHECK-NEXT: add w8, w8, w0, uxth
324+
; CHECK-NEXT: cmp w8, w9
321325
; CHECK-NEXT: cset w0, hi
322326
; CHECK-NEXT: ret
323327
%tmp0 = add i16 %x, 192 ; (1U << (8-1)) + (1U << (8-1-1))
@@ -329,9 +333,10 @@ define i1 @add_ugecmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind {
329333
define i1 @add_ugecmp_bad_i16_i8_c1notpoweroftwo(i16 %x) nounwind {
330334
; CHECK-LABEL: add_ugecmp_bad_i16_i8_c1notpoweroftwo:
331335
; CHECK: // %bb.0:
332-
; CHECK-NEXT: add w8, w0, #128
333-
; CHECK-NEXT: and w8, w8, #0xffff
334-
; CHECK-NEXT: cmp w8, #767
336+
; CHECK-NEXT: mov w8, #-65408 // =0xffff0080
337+
; CHECK-NEXT: mov w9, #-64769 // =0xffff02ff
338+
; CHECK-NEXT: add w8, w8, w0, uxth
339+
; CHECK-NEXT: cmp w8, w9
335340
; CHECK-NEXT: cset w0, hi
336341
; CHECK-NEXT: ret
337342
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
@@ -343,9 +348,10 @@ define i1 @add_ugecmp_bad_i16_i8_c1notpoweroftwo(i16 %x) nounwind {
343348
define i1 @add_ugecmp_bad_i16_i8_magic(i16 %x) nounwind {
344349
; CHECK-LABEL: add_ugecmp_bad_i16_i8_magic:
345350
; CHECK: // %bb.0:
346-
; CHECK-NEXT: add w8, w0, #64
347-
; CHECK-NEXT: and w8, w8, #0xffff
348-
; CHECK-NEXT: cmp w8, #255
351+
; CHECK-NEXT: mov w8, #-65472 // =0xffff0040
352+
; CHECK-NEXT: mov w9, #-65281 // =0xffff00ff
353+
; CHECK-NEXT: add w8, w8, w0, uxth
354+
; CHECK-NEXT: cmp w8, w9
349355
; CHECK-NEXT: cset w0, hi
350356
; CHECK-NEXT: ret
351357
%tmp0 = add i16 %x, 64 ; 1U << (8-1-1)
@@ -357,9 +363,10 @@ define i1 @add_ugecmp_bad_i16_i8_magic(i16 %x) nounwind {
357363
define i1 @add_ugecmp_bad_i16_i4(i16 %x) nounwind {
358364
; CHECK-LABEL: add_ugecmp_bad_i16_i4:
359365
; CHECK: // %bb.0:
360-
; CHECK-NEXT: add w8, w0, #8
361-
; CHECK-NEXT: and w8, w8, #0xffff
362-
; CHECK-NEXT: cmp w8, #15
366+
; CHECK-NEXT: mov w8, #-65528 // =0xffff0008
367+
; CHECK-NEXT: mov w9, #-65521 // =0xffff000f
368+
; CHECK-NEXT: add w8, w8, w0, uxth
369+
; CHECK-NEXT: cmp w8, w9
363370
; CHECK-NEXT: cset w0, hi
364371
; CHECK-NEXT: ret
365372
%tmp0 = add i16 %x, 8 ; 1U << (4-1)
@@ -371,9 +378,12 @@ define i1 @add_ugecmp_bad_i16_i4(i16 %x) nounwind {
371378
define i1 @add_ugecmp_bad_i24_i8(i24 %x) nounwind {
372379
; CHECK-LABEL: add_ugecmp_bad_i24_i8:
373380
; CHECK: // %bb.0:
374-
; CHECK-NEXT: add w8, w0, #128
375-
; CHECK-NEXT: and w8, w8, #0xffffff
376-
; CHECK-NEXT: cmp w8, #255
381+
; CHECK-NEXT: mov w8, #128 // =0x80
382+
; CHECK-NEXT: and w9, w0, #0xffffff
383+
; CHECK-NEXT: movk w8, #65280, lsl #16
384+
; CHECK-NEXT: add w8, w9, w8
385+
; CHECK-NEXT: mov w9, #-16776961 // =0xff0000ff
386+
; CHECK-NEXT: cmp w8, w9
377387
; CHECK-NEXT: cset w0, hi
378388
; CHECK-NEXT: ret
379389
%tmp0 = add i24 %x, 128 ; 1U << (8-1)

0 commit comments

Comments
 (0)