Skip to content

Commit 884b051

Browse files
committed
Recommit "[TypePromotion] Support positive addition amounts in isSafeWrap. (#81690)"
With special case with Add constant is 0. Original message: We can support these by changing the sext promotion to -zext(-C) and replacing a sgt check with ugt. Reframing the logic in terms of how the unsigned range are affected. More comments in the patch. The new cases check isLegalAddImmediate to avoid some regressions in lit tests.
1 parent a25fa92 commit 884b051

File tree

7 files changed

+92
-82
lines changed

7 files changed

+92
-82
lines changed

llvm/lib/CodeGen/TypePromotion.cpp

Lines changed: 69 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ class IRPromoter {
136136

137137
class TypePromotionImpl {
138138
unsigned TypeSize = 0;
139+
const TargetLowering *TLI = nullptr;
139140
LLVMContext *Ctx = nullptr;
140141
unsigned RegisterBitWidth = 0;
141142
SmallPtrSet<Value *, 16> AllVisited;
@@ -272,64 +273,58 @@ bool TypePromotionImpl::isSink(Value *V) {
272273

273274
/// Return whether this instruction can safely wrap.
274275
bool TypePromotionImpl::isSafeWrap(Instruction *I) {
275-
// We can support a potentially wrapping instruction (I) if:
276+
// We can support a potentially wrapping Add/Sub instruction (I) if:
276277
// - It is only used by an unsigned icmp.
277278
// - The icmp uses a constant.
278-
// - The wrapping value (I) is decreasing, i.e would underflow - wrapping
279-
// around zero to become a larger number than before.
280279
// - The wrapping instruction (I) also uses a constant.
281280
//
282-
// We can then use the two constants to calculate whether the result would
283-
// wrap in respect to itself in the original bitwidth. If it doesn't wrap,
284-
// just underflows the range, the icmp would give the same result whether the
285-
// result has been truncated or not. We calculate this by:
286-
// - Zero extending both constants, if needed, to RegisterBitWidth.
287-
// - Take the absolute value of I's constant, adding this to the icmp const.
288-
// - Check that this value is not out of range for small type. If it is, it
289-
// means that it has underflowed enough to wrap around the icmp constant.
281+
// This a common pattern emitted to check if a value is within a range.
290282
//
291283
// For example:
292284
//
293-
// %sub = sub i8 %a, 2
294-
// %cmp = icmp ule i8 %sub, 254
285+
// %sub = sub i8 %a, C1
286+
// %cmp = icmp ule i8 %sub, C2
287+
//
288+
// or
289+
//
290+
// %add = add i8 %a, C1
291+
// %cmp = icmp ule i8 %add, C2.
295292
//
296-
// If %a = 0, %sub = -2 == FE == 254
297-
// But if this is evalulated as a i32
298-
// %sub = -2 == FF FF FF FE == 4294967294
299-
// So the unsigned compares (i8 and i32) would not yield the same result.
293+
// We will treat an add as though it were a subtract by -C1. To promote
294+
// the Add/Sub we will zero extend the LHS and the subtracted amount. For Add,
295+
// this means we need to negate the constant, zero extend to RegisterBitWidth,
296+
// and negate in the larger type.
300297
//
301-
// Another way to look at it is:
302-
// %a - 2 <= 254
303-
// %a + 2 <= 254 + 2
304-
// %a <= 256
305-
// And we can't represent 256 in the i8 format, so we don't support it.
298+
// This will produce a value in the range [-zext(C1), zext(X)-zext(C1)] where
299+
// C1 is the subtracted amount. This is either a small unsigned number or a
300+
// large unsigned number in the promoted type.
306301
//
307-
// Whereas:
302+
// Now we need to correct the compare constant C2. Values >= C1 in the
303+
// original add result range have been remapped to large values in the
304+
// promoted range. If the compare constant fell into this range we need to
305+
// remap it as well. We can do this as -(zext(-C2)).
308306
//
309-
// %sub i8 %a, 1
307+
// For example:
308+
//
309+
// %sub = sub i8 %a, 2
310310
// %cmp = icmp ule i8 %sub, 254
311311
//
312-
// If %a = 0, %sub = -1 == FF == 255
313-
// As i32:
314-
// %sub = -1 == FF FF FF FF == 4294967295
312+
// becomes
315313
//
316-
// In this case, the unsigned compare results would be the same and this
317-
// would also be true for ult, uge and ugt:
318-
// - (255 < 254) == (0xFFFFFFFF < 254) == false
319-
// - (255 <= 254) == (0xFFFFFFFF <= 254) == false
320-
// - (255 > 254) == (0xFFFFFFFF > 254) == true
321-
// - (255 >= 254) == (0xFFFFFFFF >= 254) == true
314+
// %zext = zext %a to i32
315+
// %sub = sub i32 %zext, 2
316+
// %cmp = icmp ule i32 %sub, 4294967294
322317
//
323-
// To demonstrate why we can't handle increasing values:
318+
// Another example:
324319
//
325-
// %add = add i8 %a, 2
326-
// %cmp = icmp ult i8 %add, 127
320+
// %sub = sub i8 %a, 1
321+
// %cmp = icmp ule i8 %sub, 254
327322
//
328-
// If %a = 254, %add = 256 == (i8 1)
329-
// As i32:
330-
// %add = 256
323+
// becomes
331324
//
332-
// (1 < 127) != (256 < 127)
325+
// %zext = zext %a to i32
326+
// %sub = sub i32 %zext, 1
327+
// %cmp = icmp ule i32 %sub, 254
333328

334329
unsigned Opc = I->getOpcode();
335330
if (Opc != Instruction::Add && Opc != Instruction::Sub)
@@ -356,21 +351,29 @@ bool TypePromotionImpl::isSafeWrap(Instruction *I) {
356351
APInt OverflowConst = cast<ConstantInt>(I->getOperand(1))->getValue();
357352
if (Opc == Instruction::Sub)
358353
OverflowConst = -OverflowConst;
359-
if (!OverflowConst.isNonPositive())
360-
return false;
354+
355+
// If the constant is positive, we will end up filling the promoted bits with
356+
// all 1s. Make sure that results in a cheap add constant.
357+
if (!OverflowConst.isNonPositive()) {
358+
// We don't have the true promoted width, just use 64 so we can create an
359+
// int64_t for the isLegalAddImmediate call.
360+
if (OverflowConst.getBitWidth() >= 64)
361+
return false;
362+
363+
APInt NewConst = -((-OverflowConst).zext(64));
364+
if (!TLI->isLegalAddImmediate(NewConst.getSExtValue()))
365+
return false;
366+
}
361367

362368
SafeWrap.insert(I);
363369

364-
// Using C1 = OverflowConst and C2 = ICmpConst, we can either prove that:
365-
// zext(x) + sext(C1) <u zext(C2) if C1 < 0 and C1 >s C2
366-
// zext(x) + sext(C1) <u sext(C2) if C1 < 0 and C1 <=s C2
367-
if (OverflowConst.sgt(ICmpConst)) {
368-
LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
370+
if (OverflowConst == 0 || OverflowConst.ugt(ICmpConst)) {
371+
LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for "
369372
<< "const of " << *I << "\n");
370373
return true;
371374
}
372375

373-
LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
376+
LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for "
374377
<< "const of " << *I << " and " << *CI << "\n");
375378
SafeWrap.insert(CI);
376379
return true;
@@ -487,18 +490,24 @@ void IRPromoter::PromoteTree() {
487490
continue;
488491

489492
if (auto *Const = dyn_cast<ConstantInt>(Op)) {
490-
// For subtract, we don't need to sext the constant. We only put it in
493+
// For subtract, we only need to zext the constant. We only put it in
491494
// SafeWrap because SafeWrap.size() is used elsewhere.
492-
// For cmp, we need to sign extend a constant appearing in either
493-
// operand. For add, we should only sign extend the RHS.
494-
Constant *NewConst =
495-
ConstantInt::get(Const->getContext(),
496-
(SafeWrap.contains(I) &&
497-
(I->getOpcode() == Instruction::ICmp || i == 1) &&
498-
I->getOpcode() != Instruction::Sub)
499-
? Const->getValue().sext(PromotedWidth)
500-
: Const->getValue().zext(PromotedWidth));
501-
I->setOperand(i, NewConst);
495+
// For Add and ICmp we need to find how far the constant is from the
496+
// top of its original unsigned range and place it the same distance
497+
// from the top of its new unsigned range. We can do this by negating
498+
// the constant, zero extending it, then negating in the new type.
499+
APInt NewConst;
500+
if (SafeWrap.contains(I)) {
501+
if (I->getOpcode() == Instruction::ICmp)
502+
NewConst = -((-Const->getValue()).zext(PromotedWidth));
503+
else if (I->getOpcode() == Instruction::Add && i == 1)
504+
NewConst = -((-Const->getValue()).zext(PromotedWidth));
505+
else
506+
NewConst = Const->getValue().zext(PromotedWidth);
507+
} else
508+
NewConst = Const->getValue().zext(PromotedWidth);
509+
510+
I->setOperand(i, ConstantInt::get(Const->getContext(), NewConst));
502511
} else if (isa<UndefValue>(Op))
503512
I->setOperand(i, ConstantInt::get(ExtTy, 0));
504513
}
@@ -917,7 +926,7 @@ bool TypePromotionImpl::run(Function &F, const TargetMachine *TM,
917926
bool MadeChange = false;
918927
const DataLayout &DL = F.getParent()->getDataLayout();
919928
const TargetSubtargetInfo *SubtargetInfo = TM->getSubtargetImpl(F);
920-
const TargetLowering *TLI = SubtargetInfo->getTargetLowering();
929+
TLI = SubtargetInfo->getTargetLowering();
921930
RegisterBitWidth =
922931
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar).getFixedValue();
923932
Ctx = &F.getParent()->getContext();

llvm/test/CodeGen/AArch64/and-mask-removal.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,8 @@ if.end: ; preds = %if.then, %entry
6565
define zeroext i1 @test8_0(i8 zeroext %x) align 2 {
6666
; CHECK-LABEL: test8_0:
6767
; CHECK: ; %bb.0: ; %entry
68-
; CHECK-NEXT: add w8, w0, #74
69-
; CHECK-NEXT: and w8, w8, #0xff
70-
; CHECK-NEXT: cmp w8, #236
68+
; CHECK-NEXT: sub w8, w0, #182
69+
; CHECK-NEXT: cmn w8, #20
7170
; CHECK-NEXT: cset w0, lo
7271
; CHECK-NEXT: ret
7372
entry:
@@ -508,16 +507,17 @@ define i64 @pr58109(i8 signext %0) {
508507
define i64 @pr58109b(i8 signext %0, i64 %a, i64 %b) {
509508
; CHECK-SD-LABEL: pr58109b:
510509
; CHECK-SD: ; %bb.0:
511-
; CHECK-SD-NEXT: add w8, w0, #1
512-
; CHECK-SD-NEXT: tst w8, #0xfe
513-
; CHECK-SD-NEXT: csel x0, x1, x2, eq
510+
; CHECK-SD-NEXT: and w8, w0, #0xff
511+
; CHECK-SD-NEXT: sub w8, w8, #255
512+
; CHECK-SD-NEXT: cmn w8, #254
513+
; CHECK-SD-NEXT: csel x0, x1, x2, lo
514514
; CHECK-SD-NEXT: ret
515515
;
516516
; CHECK-GI-LABEL: pr58109b:
517517
; CHECK-GI: ; %bb.0:
518-
; CHECK-GI-NEXT: add w8, w0, #1
519-
; CHECK-GI-NEXT: and w8, w8, #0xff
520-
; CHECK-GI-NEXT: cmp w8, #2
518+
; CHECK-GI-NEXT: mov w8, #-255 ; =0xffffff01
519+
; CHECK-GI-NEXT: add w8, w8, w0, uxtb
520+
; CHECK-GI-NEXT: cmn w8, #254
521521
; CHECK-GI-NEXT: csel x0, x1, x2, lo
522522
; CHECK-GI-NEXT: ret
523523
%2 = add i8 %0, 1

llvm/test/CodeGen/AArch64/signed-truncation-check.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ define i1 @add_ultcmp_bad_i24_i8(i24 %x) nounwind {
396396
define i1 @add_ulecmp_bad_i16_i8(i16 %x) nounwind {
397397
; CHECK-LABEL: add_ulecmp_bad_i16_i8:
398398
; CHECK: // %bb.0:
399-
; CHECK-NEXT: mov w0, #1
399+
; CHECK-NEXT: mov w0, #1 // =0x1
400400
; CHECK-NEXT: ret
401401
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
402402
%tmp1 = icmp ule i16 %tmp0, -1 ; when we +1 it, it will wrap to 0

llvm/test/CodeGen/AArch64/typepromotion-overflow.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -246,9 +246,8 @@ define i32 @safe_sub_var_imm(ptr nocapture readonly %b) local_unnamed_addr #1 {
246246
; CHECK-LABEL: safe_sub_var_imm:
247247
; CHECK: // %bb.0: // %entry
248248
; CHECK-NEXT: ldrb w8, [x0]
249-
; CHECK-NEXT: add w8, w8, #8
250-
; CHECK-NEXT: and w8, w8, #0xff
251-
; CHECK-NEXT: cmp w8, #252
249+
; CHECK-NEXT: sub w8, w8, #248
250+
; CHECK-NEXT: cmn w8, #4
252251
; CHECK-NEXT: cset w0, hi
253252
; CHECK-NEXT: ret
254253
entry:

llvm/test/CodeGen/RISCV/typepromotion-overflow.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -283,9 +283,8 @@ define i32 @safe_sub_var_imm(ptr nocapture readonly %b) local_unnamed_addr #1 {
283283
; CHECK-LABEL: safe_sub_var_imm:
284284
; CHECK: # %bb.0: # %entry
285285
; CHECK-NEXT: lbu a0, 0(a0)
286-
; CHECK-NEXT: addi a0, a0, 8
287-
; CHECK-NEXT: andi a0, a0, 255
288-
; CHECK-NEXT: sltiu a0, a0, 253
286+
; CHECK-NEXT: addi a0, a0, -248
287+
; CHECK-NEXT: sltiu a0, a0, -3
289288
; CHECK-NEXT: xori a0, a0, 1
290289
; CHECK-NEXT: ret
291290
entry:

llvm/test/Transforms/TypePromotion/ARM/icmps.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
define i32 @test_ult_254_inc_imm(i8 zeroext %x) {
55
; CHECK-LABEL: @test_ult_254_inc_imm(
66
; CHECK-NEXT: entry:
7-
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X:%.*]], 1
8-
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[ADD]], -2
7+
; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[X:%.*]] to i32
8+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], -255
9+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], -2
910
; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 35, i32 47
1011
; CHECK-NEXT: ret i32 [[RES]]
1112
;

llvm/test/Transforms/TypePromotion/ARM/wrapping.ll

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,9 @@ define i32 @overflow_add_const_limit(i8 zeroext %a, i8 zeroext %b) {
8989

9090
define i32 @overflow_add_positive_const_limit(i8 zeroext %a) {
9191
; CHECK-LABEL: @overflow_add_positive_const_limit(
92-
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[A:%.*]], 1
93-
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[ADD]], -128
92+
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
93+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], -255
94+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[ADD]], -128
9495
; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16
9596
; CHECK-NEXT: ret i32 [[RES]]
9697
;
@@ -144,8 +145,9 @@ define i32 @safe_add_underflow_neg(i8 zeroext %a) {
144145

145146
define i32 @overflow_sub_negative_const_limit(i8 zeroext %a) {
146147
; CHECK-LABEL: @overflow_sub_negative_const_limit(
147-
; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[A:%.*]], -1
148-
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[SUB]], -128
148+
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
149+
; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[TMP1]], 255
150+
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[SUB]], -128
149151
; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16
150152
; CHECK-NEXT: ret i32 [[RES]]
151153
;

0 commit comments

Comments
 (0)