Skip to content

Commit 97f08c6

Browse files
committed
[TypePromotion] Support positive addition amounts in isSafeWrap.
We can support these by changing the sext promotion to -zext(-C) and replacing a sgt check with ugt. Reframing the logic in terms of how the unsigned range are affected. More comments in the patch.
1 parent 017675f commit 97f08c6

File tree

7 files changed

+91
-81
lines changed

7 files changed

+91
-81
lines changed

llvm/lib/CodeGen/TypePromotion.cpp

Lines changed: 67 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ class IRPromoter {
136136

137137
class TypePromotionImpl {
138138
unsigned TypeSize = 0;
139+
const TargetLowering *TLI = nullptr;
139140
LLVMContext *Ctx = nullptr;
140141
unsigned RegisterBitWidth = 0;
141142
SmallPtrSet<Value *, 16> AllVisited;
@@ -272,64 +273,58 @@ bool TypePromotionImpl::isSink(Value *V) {
272273

273274
/// Return whether this instruction can safely wrap.
274275
bool TypePromotionImpl::isSafeWrap(Instruction *I) {
275-
// We can support a potentially wrapping instruction (I) if:
276+
// We can support a potentially wrapping Add/Sub instruction (I) if:
276277
// - It is only used by an unsigned icmp.
277278
// - The icmp uses a constant.
278-
// - The wrapping value (I) is decreasing, i.e would underflow - wrapping
279-
// around zero to become a larger number than before.
280279
// - The wrapping instruction (I) also uses a constant.
281280
//
282-
// We can then use the two constants to calculate whether the result would
283-
// wrap in respect to itself in the original bitwidth. If it doesn't wrap,
284-
// just underflows the range, the icmp would give the same result whether the
285-
// result has been truncated or not. We calculate this by:
286-
// - Zero extending both constants, if needed, to RegisterBitWidth.
287-
// - Take the absolute value of I's constant, adding this to the icmp const.
288-
// - Check that this value is not out of range for small type. If it is, it
289-
// means that it has underflowed enough to wrap around the icmp constant.
281+
// This a common pattern emitted to check if a value is within a range.
290282
//
291283
// For example:
292284
//
293-
// %sub = sub i8 %a, 2
294-
// %cmp = icmp ule i8 %sub, 254
285+
// %sub = sub i8 %a, C1
286+
// %cmp = icmp ule i8 %sub, C2
287+
//
288+
// or
289+
//
290+
// %add = add i8 %a, C1
291+
// %cmp = icmp ule i8 %add, C2.
295292
//
296-
// If %a = 0, %sub = -2 == FE == 254
297-
// But if this is evalulated as a i32
298-
// %sub = -2 == FF FF FF FE == 4294967294
299-
// So the unsigned compares (i8 and i32) would not yield the same result.
293+
// We will treat an add as though it were a subtract by -C1. To promote
294+
// the Add/Sub we will zero extend the LHS and the subtracted amount. For Add,
295+
// this means we need to negate the constant, zero extend to RegisterBitWidth,
296+
// and negate in the larger type.
300297
//
301-
// Another way to look at it is:
302-
// %a - 2 <= 254
303-
// %a + 2 <= 254 + 2
304-
// %a <= 256
305-
// And we can't represent 256 in the i8 format, so we don't support it.
298+
// This will produce a value in the range [-zext(C1), zext(X)-zext(C1)] where
299+
// C1 is the subtracted amount. This is either a small unsigned number or a
300+
// large unsigned number in the promoted type.
306301
//
307-
// Whereas:
302+
// Now we need to correct the compare constant C2. Values >= C1 in the
303+
// original add result range have been remapped to large values in the
304+
// promoted range. If the compare constant fell into this range we need to
305+
// remap it as well. We can do this as -(zext(-C2)).
308306
//
309-
// %sub i8 %a, 1
307+
// For example:
308+
//
309+
// %sub = sub i8 %a, 2
310310
// %cmp = icmp ule i8 %sub, 254
311311
//
312-
// If %a = 0, %sub = -1 == FF == 255
313-
// As i32:
314-
// %sub = -1 == FF FF FF FF == 4294967295
312+
// becomes
315313
//
316-
// In this case, the unsigned compare results would be the same and this
317-
// would also be true for ult, uge and ugt:
318-
// - (255 < 254) == (0xFFFFFFFF < 254) == false
319-
// - (255 <= 254) == (0xFFFFFFFF <= 254) == false
320-
// - (255 > 254) == (0xFFFFFFFF > 254) == true
321-
// - (255 >= 254) == (0xFFFFFFFF >= 254) == true
314+
// %zext = zext %a to i32
315+
// %sub = sub i32 %zext, 2
316+
// %cmp = icmp ule i32 %sub, 4294967294
322317
//
323-
// To demonstrate why we can't handle increasing values:
318+
// Another example:
324319
//
325-
// %add = add i8 %a, 2
326-
// %cmp = icmp ult i8 %add, 127
320+
// %sub = sub i8 %a, 1
321+
// %cmp = icmp ule i8 %sub, 254
327322
//
328-
// If %a = 254, %add = 256 == (i8 1)
329-
// As i32:
330-
// %add = 256
323+
// becomes
331324
//
332-
// (1 < 127) != (256 < 127)
325+
// %zext = zext %a to i32
326+
// %sub = sub i32 %zext, 1
327+
// %cmp = icmp ule i32 %sub, 254
333328

334329
unsigned Opc = I->getOpcode();
335330
if (Opc != Instruction::Add && Opc != Instruction::Sub)
@@ -356,15 +351,23 @@ bool TypePromotionImpl::isSafeWrap(Instruction *I) {
356351
APInt OverflowConst = cast<ConstantInt>(I->getOperand(1))->getValue();
357352
if (Opc == Instruction::Sub)
358353
OverflowConst = -OverflowConst;
359-
if (!OverflowConst.isNonPositive())
360-
return false;
354+
355+
// If the constant is positive, we will end up filling the promoted bits with
356+
// all 1s. Make sure that results in a cheap add constant.
357+
if (!OverflowConst.isNonPositive()) {
358+
// We don't have the true promoted width, just use 64 so we can create an
359+
// int64_t for the isLegalAddImmediate call.
360+
if (OverflowConst.getBitWidth() >= 64)
361+
return false;
362+
363+
APInt NewConst = -((-OverflowConst).zext(64));
364+
if (!TLI->isLegalAddImmediate(NewConst.getSExtValue()))
365+
return false;
366+
}
361367

362368
SafeWrap.insert(I);
363369

364-
// Using C1 = OverflowConst and C2 = ICmpConst, we can either prove that:
365-
// zext(x) + sext(C1) <u zext(C2) if C1 < 0 and C1 >s C2
366-
// zext(x) + sext(C1) <u sext(C2) if C1 < 0 and C1 <=s C2
367-
if (OverflowConst.sgt(ICmpConst)) {
370+
if (OverflowConst.ugt(ICmpConst)) {
368371
LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
369372
<< "const of " << *I << "\n");
370373
return true;
@@ -487,18 +490,24 @@ void IRPromoter::PromoteTree() {
487490
continue;
488491

489492
if (auto *Const = dyn_cast<ConstantInt>(Op)) {
490-
// For subtract, we don't need to sext the constant. We only put it in
493+
// For subtract, we only need to zext the constant. We only put it in
491494
// SafeWrap because SafeWrap.size() is used elsewhere.
492-
// For cmp, we need to sign extend a constant appearing in either
493-
// operand. For add, we should only sign extend the RHS.
494-
Constant *NewConst =
495-
ConstantInt::get(Const->getContext(),
496-
(SafeWrap.contains(I) &&
497-
(I->getOpcode() == Instruction::ICmp || i == 1) &&
498-
I->getOpcode() != Instruction::Sub)
499-
? Const->getValue().sext(PromotedWidth)
500-
: Const->getValue().zext(PromotedWidth));
501-
I->setOperand(i, NewConst);
495+
// For Add and ICmp we need to find how far the constant is from the
496+
// top of its original unsigned range and place it the same distance
497+
// from the top of its new unsigned range. We can do this by negating
498+
// the constant, zero extending it, then negating in the new type.
499+
APInt NewConst;
500+
if (SafeWrap.contains(I)) {
501+
if (I->getOpcode() == Instruction::ICmp)
502+
NewConst = -((-Const->getValue()).zext(PromotedWidth));
503+
else if (I->getOpcode() == Instruction::Add && i == 1)
504+
NewConst = -((-Const->getValue()).zext(PromotedWidth));
505+
else
506+
NewConst = Const->getValue().zext(PromotedWidth);
507+
} else
508+
NewConst = Const->getValue().zext(PromotedWidth);
509+
510+
I->setOperand(i, ConstantInt::get(Const->getContext(), NewConst));
502511
} else if (isa<UndefValue>(Op))
503512
I->setOperand(i, ConstantInt::get(ExtTy, 0));
504513
}
@@ -917,7 +926,7 @@ bool TypePromotionImpl::run(Function &F, const TargetMachine *TM,
917926
bool MadeChange = false;
918927
const DataLayout &DL = F.getParent()->getDataLayout();
919928
const TargetSubtargetInfo *SubtargetInfo = TM->getSubtargetImpl(F);
920-
const TargetLowering *TLI = SubtargetInfo->getTargetLowering();
929+
TLI = SubtargetInfo->getTargetLowering();
921930
RegisterBitWidth =
922931
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar).getFixedValue();
923932
Ctx = &F.getParent()->getContext();

llvm/test/CodeGen/AArch64/and-mask-removal.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,8 @@ if.end: ; preds = %if.then, %entry
6565
define zeroext i1 @test8_0(i8 zeroext %x) align 2 {
6666
; CHECK-LABEL: test8_0:
6767
; CHECK: ; %bb.0: ; %entry
68-
; CHECK-NEXT: add w8, w0, #74
69-
; CHECK-NEXT: and w8, w8, #0xff
70-
; CHECK-NEXT: cmp w8, #236
68+
; CHECK-NEXT: sub w8, w0, #182
69+
; CHECK-NEXT: cmn w8, #20
7170
; CHECK-NEXT: cset w0, lo
7271
; CHECK-NEXT: ret
7372
entry:
@@ -508,16 +507,17 @@ define i64 @pr58109(i8 signext %0) {
508507
define i64 @pr58109b(i8 signext %0, i64 %a, i64 %b) {
509508
; CHECK-SD-LABEL: pr58109b:
510509
; CHECK-SD: ; %bb.0:
511-
; CHECK-SD-NEXT: add w8, w0, #1
512-
; CHECK-SD-NEXT: tst w8, #0xfe
513-
; CHECK-SD-NEXT: csel x0, x1, x2, eq
510+
; CHECK-SD-NEXT: and w8, w0, #0xff
511+
; CHECK-SD-NEXT: sub w8, w8, #255
512+
; CHECK-SD-NEXT: cmn w8, #254
513+
; CHECK-SD-NEXT: csel x0, x1, x2, lo
514514
; CHECK-SD-NEXT: ret
515515
;
516516
; CHECK-GI-LABEL: pr58109b:
517517
; CHECK-GI: ; %bb.0:
518-
; CHECK-GI-NEXT: add w8, w0, #1
519-
; CHECK-GI-NEXT: and w8, w8, #0xff
520-
; CHECK-GI-NEXT: cmp w8, #2
518+
; CHECK-GI-NEXT: mov w8, #-255 ; =0xffffff01
519+
; CHECK-GI-NEXT: add w8, w8, w0, uxtb
520+
; CHECK-GI-NEXT: cmn w8, #254
521521
; CHECK-GI-NEXT: csel x0, x1, x2, lo
522522
; CHECK-GI-NEXT: ret
523523
%2 = add i8 %0, 1

llvm/test/CodeGen/AArch64/signed-truncation-check.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ define i1 @add_ultcmp_bad_i24_i8(i24 %x) nounwind {
396396
define i1 @add_ulecmp_bad_i16_i8(i16 %x) nounwind {
397397
; CHECK-LABEL: add_ulecmp_bad_i16_i8:
398398
; CHECK: // %bb.0:
399-
; CHECK-NEXT: mov w0, #1
399+
; CHECK-NEXT: mov w0, #1 // =0x1
400400
; CHECK-NEXT: ret
401401
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
402402
%tmp1 = icmp ule i16 %tmp0, -1 ; when we +1 it, it will wrap to 0

llvm/test/CodeGen/AArch64/typepromotion-overflow.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -246,9 +246,8 @@ define i32 @safe_sub_var_imm(ptr nocapture readonly %b) local_unnamed_addr #1 {
246246
; CHECK-LABEL: safe_sub_var_imm:
247247
; CHECK: // %bb.0: // %entry
248248
; CHECK-NEXT: ldrb w8, [x0]
249-
; CHECK-NEXT: add w8, w8, #8
250-
; CHECK-NEXT: and w8, w8, #0xff
251-
; CHECK-NEXT: cmp w8, #252
249+
; CHECK-NEXT: sub w8, w8, #248
250+
; CHECK-NEXT: cmn w8, #4
252251
; CHECK-NEXT: cset w0, hi
253252
; CHECK-NEXT: ret
254253
entry:

llvm/test/CodeGen/RISCV/typepromotion-overflow.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -283,9 +283,8 @@ define i32 @safe_sub_var_imm(ptr nocapture readonly %b) local_unnamed_addr #1 {
283283
; CHECK-LABEL: safe_sub_var_imm:
284284
; CHECK: # %bb.0: # %entry
285285
; CHECK-NEXT: lbu a0, 0(a0)
286-
; CHECK-NEXT: addi a0, a0, 8
287-
; CHECK-NEXT: andi a0, a0, 255
288-
; CHECK-NEXT: sltiu a0, a0, 253
286+
; CHECK-NEXT: addi a0, a0, -248
287+
; CHECK-NEXT: sltiu a0, a0, -3
289288
; CHECK-NEXT: xori a0, a0, 1
290289
; CHECK-NEXT: ret
291290
entry:

llvm/test/Transforms/TypePromotion/ARM/icmps.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
define i32 @test_ult_254_inc_imm(i8 zeroext %x) {
55
; CHECK-LABEL: @test_ult_254_inc_imm(
66
; CHECK-NEXT: entry:
7-
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X:%.*]], 1
8-
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[ADD]], -2
7+
; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[X:%.*]] to i32
8+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], -255
9+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], -2
910
; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 35, i32 47
1011
; CHECK-NEXT: ret i32 [[RES]]
1112
;
@@ -368,7 +369,7 @@ if.end:
368369
define i32 @degenerateicmp() {
369370
; CHECK-LABEL: @degenerateicmp(
370371
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 190, 0
371-
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i32 225, [[TMP1]]
372+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i32 -31, [[TMP1]]
372373
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 1, i32 0
373374
; CHECK-NEXT: ret i32 [[TMP3]]
374375
;

llvm/test/Transforms/TypePromotion/ARM/wrapping.ll

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,9 @@ define i32 @overflow_add_const_limit(i8 zeroext %a, i8 zeroext %b) {
8989

9090
define i32 @overflow_add_positive_const_limit(i8 zeroext %a) {
9191
; CHECK-LABEL: @overflow_add_positive_const_limit(
92-
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[A:%.*]], 1
93-
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[ADD]], -128
92+
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
93+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], -255
94+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[ADD]], -128
9495
; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16
9596
; CHECK-NEXT: ret i32 [[RES]]
9697
;
@@ -144,8 +145,9 @@ define i32 @safe_add_underflow_neg(i8 zeroext %a) {
144145

145146
define i32 @overflow_sub_negative_const_limit(i8 zeroext %a) {
146147
; CHECK-LABEL: @overflow_sub_negative_const_limit(
147-
; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[A:%.*]], -1
148-
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[SUB]], -128
148+
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
149+
; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[TMP1]], 255
150+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[SUB]], -128
149151
; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16
150152
; CHECK-NEXT: ret i32 [[RES]]
151153
;

0 commit comments

Comments
 (0)