Skip to content

[TypePromotion] Support positive addition amounts in isSafeWrap. #81690

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 67 additions & 58 deletions llvm/lib/CodeGen/TypePromotion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ class IRPromoter {

class TypePromotionImpl {
unsigned TypeSize = 0;
const TargetLowering *TLI = nullptr;
LLVMContext *Ctx = nullptr;
unsigned RegisterBitWidth = 0;
SmallPtrSet<Value *, 16> AllVisited;
Expand Down Expand Up @@ -272,64 +273,58 @@ bool TypePromotionImpl::isSink(Value *V) {

/// Return whether this instruction can safely wrap.
bool TypePromotionImpl::isSafeWrap(Instruction *I) {
// We can support a potentially wrapping instruction (I) if:
// We can support a potentially wrapping Add/Sub instruction (I) if:
// - It is only used by an unsigned icmp.
// - The icmp uses a constant.
// - The wrapping value (I) is decreasing, i.e would underflow - wrapping
// around zero to become a larger number than before.
// - The wrapping instruction (I) also uses a constant.
//
// We can then use the two constants to calculate whether the result would
// wrap in respect to itself in the original bitwidth. If it doesn't wrap,
// just underflows the range, the icmp would give the same result whether the
// result has been truncated or not. We calculate this by:
// - Zero extending both constants, if needed, to RegisterBitWidth.
// - Take the absolute value of I's constant, adding this to the icmp const.
// - Check that this value is not out of range for small type. If it is, it
// means that it has underflowed enough to wrap around the icmp constant.
// This a common pattern emitted to check if a value is within a range.
//
// For example:
//
// %sub = sub i8 %a, 2
// %cmp = icmp ule i8 %sub, 254
// %sub = sub i8 %a, C1
// %cmp = icmp ule i8 %sub, C2
//
// or
//
// %add = add i8 %a, C1
// %cmp = icmp ule i8 %add, C2.
//
// If %a = 0, %sub = -2 == FE == 254
// But if this is evalulated as a i32
// %sub = -2 == FF FF FF FE == 4294967294
// So the unsigned compares (i8 and i32) would not yield the same result.
// We will treat an add as though it were a subtract by -C1. To promote
// the Add/Sub we will zero extend the LHS and the subtracted amount. For Add,
// this means we need to negate the constant, zero extend to RegisterBitWidth,
// and negate in the larger type.
//
// Another way to look at it is:
// %a - 2 <= 254
// %a + 2 <= 254 + 2
// %a <= 256
// And we can't represent 256 in the i8 format, so we don't support it.
// This will produce a value in the range [-zext(C1), zext(X)-zext(C1)] where
// C1 is the subtracted amount. This is either a small unsigned number or a
// large unsigned number in the promoted type.
//
// Whereas:
// Now we need to correct the compare constant C2. Values >= C1 in the
// original add result range have been remapped to large values in the
// promoted range. If the compare constant fell into this range we need to
// remap it as well. We can do this as -(zext(-C2)).
//
// %sub i8 %a, 1
// For example:
//
// %sub = sub i8 %a, 2
// %cmp = icmp ule i8 %sub, 254
//
// If %a = 0, %sub = -1 == FF == 255
// As i32:
// %sub = -1 == FF FF FF FF == 4294967295
// becomes
//
// In this case, the unsigned compare results would be the same and this
// would also be true for ult, uge and ugt:
// - (255 < 254) == (0xFFFFFFFF < 254) == false
// - (255 <= 254) == (0xFFFFFFFF <= 254) == false
// - (255 > 254) == (0xFFFFFFFF > 254) == true
// - (255 >= 254) == (0xFFFFFFFF >= 254) == true
// %zext = zext %a to i32
// %sub = sub i32 %zext, 2
// %cmp = icmp ule i32 %sub, 4294967294
//
// To demonstrate why we can't handle increasing values:
// Another example:
//
// %add = add i8 %a, 2
// %cmp = icmp ult i8 %add, 127
// %sub = sub i8 %a, 1
// %cmp = icmp ule i8 %sub, 254
//
// If %a = 254, %add = 256 == (i8 1)
// As i32:
// %add = 256
// becomes
//
// (1 < 127) != (256 < 127)
// %zext = zext %a to i32
// %sub = sub i32 %zext, 1
// %cmp = icmp ule i32 %sub, 254

unsigned Opc = I->getOpcode();
if (Opc != Instruction::Add && Opc != Instruction::Sub)
Expand All @@ -356,15 +351,23 @@ bool TypePromotionImpl::isSafeWrap(Instruction *I) {
APInt OverflowConst = cast<ConstantInt>(I->getOperand(1))->getValue();
if (Opc == Instruction::Sub)
OverflowConst = -OverflowConst;
if (!OverflowConst.isNonPositive())
return false;

// If the constant is positive, we will end up filling the promoted bits with
// all 1s. Make sure that results in a cheap add constant.
if (!OverflowConst.isNonPositive()) {
// We don't have the true promoted width, just use 64 so we can create an
// int64_t for the isLegalAddImmediate call.
if (OverflowConst.getBitWidth() >= 64)
return false;

APInt NewConst = -((-OverflowConst).zext(64));
if (!TLI->isLegalAddImmediate(NewConst.getSExtValue()))
return false;
}

SafeWrap.insert(I);

// Using C1 = OverflowConst and C2 = ICmpConst, we can either prove that:
// zext(x) + sext(C1) <u zext(C2) if C1 < 0 and C1 >s C2
// zext(x) + sext(C1) <u sext(C2) if C1 < 0 and C1 <=s C2
if (OverflowConst.sgt(ICmpConst)) {
if (OverflowConst.ugt(ICmpConst)) {
LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
<< "const of " << *I << "\n");
return true;
Expand Down Expand Up @@ -487,18 +490,24 @@ void IRPromoter::PromoteTree() {
continue;

if (auto *Const = dyn_cast<ConstantInt>(Op)) {
// For subtract, we don't need to sext the constant. We only put it in
// For subtract, we only need to zext the constant. We only put it in
// SafeWrap because SafeWrap.size() is used elsewhere.
// For cmp, we need to sign extend a constant appearing in either
// operand. For add, we should only sign extend the RHS.
Constant *NewConst =
ConstantInt::get(Const->getContext(),
(SafeWrap.contains(I) &&
(I->getOpcode() == Instruction::ICmp || i == 1) &&
I->getOpcode() != Instruction::Sub)
? Const->getValue().sext(PromotedWidth)
: Const->getValue().zext(PromotedWidth));
I->setOperand(i, NewConst);
// For Add and ICmp we need to find how far the constant is from the
// top of its original unsigned range and place it the same distance
// from the top of its new unsigned range. We can do this by negating
// the constant, zero extending it, then negating in the new type.
APInt NewConst;
if (SafeWrap.contains(I)) {
if (I->getOpcode() == Instruction::ICmp)
NewConst = -((-Const->getValue()).zext(PromotedWidth));
else if (I->getOpcode() == Instruction::Add && i == 1)
NewConst = -((-Const->getValue()).zext(PromotedWidth));
else
NewConst = Const->getValue().zext(PromotedWidth);
} else
NewConst = Const->getValue().zext(PromotedWidth);

I->setOperand(i, ConstantInt::get(Const->getContext(), NewConst));
} else if (isa<UndefValue>(Op))
I->setOperand(i, ConstantInt::get(ExtTy, 0));
}
Expand Down Expand Up @@ -917,7 +926,7 @@ bool TypePromotionImpl::run(Function &F, const TargetMachine *TM,
bool MadeChange = false;
const DataLayout &DL = F.getParent()->getDataLayout();
const TargetSubtargetInfo *SubtargetInfo = TM->getSubtargetImpl(F);
const TargetLowering *TLI = SubtargetInfo->getTargetLowering();
TLI = SubtargetInfo->getTargetLowering();
RegisterBitWidth =
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar).getFixedValue();
Ctx = &F.getParent()->getContext();
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/AArch64/and-mask-removal.ll
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,8 @@ if.end: ; preds = %if.then, %entry
define zeroext i1 @test8_0(i8 zeroext %x) align 2 {
; CHECK-LABEL: test8_0:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: add w8, w0, #74
; CHECK-NEXT: and w8, w8, #0xff
; CHECK-NEXT: cmp w8, #236
; CHECK-NEXT: sub w8, w0, #182
; CHECK-NEXT: cmn w8, #20
; CHECK-NEXT: cset w0, lo
; CHECK-NEXT: ret
entry:
Expand Down Expand Up @@ -508,16 +507,17 @@ define i64 @pr58109(i8 signext %0) {
define i64 @pr58109b(i8 signext %0, i64 %a, i64 %b) {
; CHECK-SD-LABEL: pr58109b:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: add w8, w0, #1
; CHECK-SD-NEXT: tst w8, #0xfe
; CHECK-SD-NEXT: csel x0, x1, x2, eq
; CHECK-SD-NEXT: and w8, w0, #0xff
; CHECK-SD-NEXT: sub w8, w8, #255
; CHECK-SD-NEXT: cmn w8, #254
; CHECK-SD-NEXT: csel x0, x1, x2, lo
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: pr58109b:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: add w8, w0, #1
; CHECK-GI-NEXT: and w8, w8, #0xff
; CHECK-GI-NEXT: cmp w8, #2
; CHECK-GI-NEXT: mov w8, #-255 ; =0xffffff01
; CHECK-GI-NEXT: add w8, w8, w0, uxtb
; CHECK-GI-NEXT: cmn w8, #254
; CHECK-GI-NEXT: csel x0, x1, x2, lo
; CHECK-GI-NEXT: ret
%2 = add i8 %0, 1
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/signed-truncation-check.ll
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ define i1 @add_ultcmp_bad_i24_i8(i24 %x) nounwind {
define i1 @add_ulecmp_bad_i16_i8(i16 %x) nounwind {
; CHECK-LABEL: add_ulecmp_bad_i16_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w0, #1
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ule i16 %tmp0, -1 ; when we +1 it, it will wrap to 0
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/AArch64/typepromotion-overflow.ll
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,8 @@ define i32 @safe_sub_var_imm(ptr nocapture readonly %b) local_unnamed_addr #1 {
; CHECK-LABEL: safe_sub_var_imm:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldrb w8, [x0]
; CHECK-NEXT: add w8, w8, #8
; CHECK-NEXT: and w8, w8, #0xff
; CHECK-NEXT: cmp w8, #252
; CHECK-NEXT: sub w8, w8, #248
; CHECK-NEXT: cmn w8, #4
; CHECK-NEXT: cset w0, hi
; CHECK-NEXT: ret
entry:
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/RISCV/typepromotion-overflow.ll
Original file line number Diff line number Diff line change
Expand Up @@ -283,9 +283,8 @@ define i32 @safe_sub_var_imm(ptr nocapture readonly %b) local_unnamed_addr #1 {
; CHECK-LABEL: safe_sub_var_imm:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lbu a0, 0(a0)
; CHECK-NEXT: addi a0, a0, 8
; CHECK-NEXT: andi a0, a0, 255
; CHECK-NEXT: sltiu a0, a0, 253
; CHECK-NEXT: addi a0, a0, -248
; CHECK-NEXT: sltiu a0, a0, -3
; CHECK-NEXT: xori a0, a0, 1
; CHECK-NEXT: ret
entry:
Expand Down
7 changes: 4 additions & 3 deletions llvm/test/Transforms/TypePromotion/ARM/icmps.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
define i32 @test_ult_254_inc_imm(i8 zeroext %x) {
; CHECK-LABEL: @test_ult_254_inc_imm(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X:%.*]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[ADD]], -2
; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[X:%.*]] to i32
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], -255
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], -2
; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 35, i32 47
; CHECK-NEXT: ret i32 [[RES]]
;
Expand Down Expand Up @@ -368,7 +369,7 @@ if.end:
define i32 @degenerateicmp() {
; CHECK-LABEL: @degenerateicmp(
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 190, 0
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i32 225, [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i32 -31, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 1, i32 0
; CHECK-NEXT: ret i32 [[TMP3]]
;
Expand Down
10 changes: 6 additions & 4 deletions llvm/test/Transforms/TypePromotion/ARM/wrapping.ll
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,9 @@ define i32 @overflow_add_const_limit(i8 zeroext %a, i8 zeroext %b) {

define i32 @overflow_add_positive_const_limit(i8 zeroext %a) {
; CHECK-LABEL: @overflow_add_positive_const_limit(
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[A:%.*]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[ADD]], -128
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], -255
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[ADD]], -128
; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16
; CHECK-NEXT: ret i32 [[RES]]
;
Expand Down Expand Up @@ -144,8 +145,9 @@ define i32 @safe_add_underflow_neg(i8 zeroext %a) {

define i32 @overflow_sub_negative_const_limit(i8 zeroext %a) {
; CHECK-LABEL: @overflow_sub_negative_const_limit(
; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[A:%.*]], -1
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[SUB]], -128
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[TMP1]], 255
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[SUB]], -128
; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16
; CHECK-NEXT: ret i32 [[RES]]
;
Expand Down