Revert "[TypePromotion] Support positive addition amounts in isSafeWrap. (#81690)"

topperc · topperc · commit 561ddb1687c2 · 2024-03-11T00:51:21.000-07:00
This reverts commit 0813b90. Fixes miscompile reported in #84718.
diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp
@@ -136,7 +136,6 @@ class IRPromoter {
 
 class TypePromotionImpl {
   unsigned TypeSize = 0;
-  const TargetLowering *TLI = nullptr;
   LLVMContext *Ctx = nullptr;
   unsigned RegisterBitWidth = 0;
   SmallPtrSet<Value *, 16> AllVisited;
@@ -273,58 +272,64 @@ bool TypePromotionImpl::isSink(Value *V) {
 
 /// Return whether this instruction can safely wrap.
 bool TypePromotionImpl::isSafeWrap(Instruction *I) {
-  // We can support a potentially wrapping Add/Sub instruction (I) if:
+  // We can support a potentially wrapping instruction (I) if:
   // - It is only used by an unsigned icmp.
   // - The icmp uses a constant.
+  // - The wrapping value (I) is decreasing, i.e would underflow - wrapping
+  //   around zero to become a larger number than before.
   // - The wrapping instruction (I) also uses a constant.
   //
-  // This a common pattern emitted to check if a value is within a range.
+  // We can then use the two constants to calculate whether the result would
+  // wrap in respect to itself in the original bitwidth. If it doesn't wrap,
+  // just underflows the range, the icmp would give the same result whether the
+  // result has been truncated or not. We calculate this by:
+  // - Zero extending both constants, if needed, to RegisterBitWidth.
+  // - Take the absolute value of I's constant, adding this to the icmp const.
+  // - Check that this value is not out of range for small type. If it is, it
+  //   means that it has underflowed enough to wrap around the icmp constant.
   //
   // For example:
   //
-  // %sub = sub i8 %a, C1
-  // %cmp = icmp ule i8 %sub, C2
-  //
-  // or
-  //
-  // %add = add i8 %a, C1
-  // %cmp = icmp ule i8 %add, C2.
-  //
-  // We will treat an add as though it were a subtract by -C1. To promote
-  // the Add/Sub we will zero extend the LHS and the subtracted amount. For Add,
-  // this means we need to negate the constant, zero extend to RegisterBitWidth,
-  // and negate in the larger type.
+  // %sub = sub i8 %a, 2
+  // %cmp = icmp ule i8 %sub, 254
   //
-  // This will produce a value in the range [-zext(C1), zext(X)-zext(C1)] where
-  // C1 is the subtracted amount. This is either a small unsigned number or a
-  // large unsigned number in the promoted type.
+  // If %a = 0, %sub = -2 == FE == 254
+  // But if this is evalulated as a i32
+  // %sub = -2 == FF FF FF FE == 4294967294
+  // So the unsigned compares (i8 and i32) would not yield the same result.
   //
-  // Now we need to correct the compare constant C2. Values >= C1 in the
-  // original add result range have been remapped to large values in the
-  // promoted range. If the compare constant fell into this range we need to
-  // remap it as well. We can do this as -(zext(-C2)).
+  // Another way to look at it is:
+  // %a - 2 <= 254
+  // %a + 2 <= 254 + 2
+  // %a <= 256
+  // And we can't represent 256 in the i8 format, so we don't support it.
   //
-  // For example:
+  // Whereas:
   //
-  // %sub = sub i8 %a, 2
+  // %sub i8 %a, 1
   // %cmp = icmp ule i8 %sub, 254
   //
-  // becomes
+  // If %a = 0, %sub = -1 == FF == 255
+  // As i32:
+  // %sub = -1 == FF FF FF FF == 4294967295
   //
-  // %zext = zext %a to i32
-  // %sub = sub i32 %zext, 2
-  // %cmp = icmp ule i32 %sub, 4294967294
+  // In this case, the unsigned compare results would be the same and this
+  // would also be true for ult, uge and ugt:
+  // - (255 < 254) == (0xFFFFFFFF < 254) == false
+  // - (255 <= 254) == (0xFFFFFFFF <= 254) == false
+  // - (255 > 254) == (0xFFFFFFFF > 254) == true
+  // - (255 >= 254) == (0xFFFFFFFF >= 254) == true
   //
-  // Another example:
+  // To demonstrate why we can't handle increasing values:
   //
-  // %sub = sub i8 %a, 1
-  // %cmp = icmp ule i8 %sub, 254
+  // %add = add i8 %a, 2
+  // %cmp = icmp ult i8 %add, 127
   //
-  // becomes
+  // If %a = 254, %add = 256 == (i8 1)
+  // As i32:
+  // %add = 256
   //
-  // %zext = zext %a to i32
-  // %sub = sub i32 %zext, 1
-  // %cmp = icmp ule i32 %sub, 254
+  // (1 < 127) != (256 < 127)
 
   unsigned Opc = I->getOpcode();
   if (Opc != Instruction::Add && Opc != Instruction::Sub)
@@ -351,23 +356,15 @@ bool TypePromotionImpl::isSafeWrap(Instruction *I) {
   APInt OverflowConst = cast<ConstantInt>(I->getOperand(1))->getValue();
   if (Opc == Instruction::Sub)
     OverflowConst = -OverflowConst;
-
-  // If the constant is positive, we will end up filling the promoted bits with
-  // all 1s. Make sure that results in a cheap add constant.
-  if (!OverflowConst.isNonPositive()) {
-    // We don't have the true promoted width, just use 64 so we can create an
-    // int64_t for the isLegalAddImmediate call.
-    if (OverflowConst.getBitWidth() >= 64)
-      return false;
-
-    APInt NewConst = -((-OverflowConst).zext(64));
-    if (!TLI->isLegalAddImmediate(NewConst.getSExtValue()))
-      return false;
-  }
+  if (!OverflowConst.isNonPositive())
+    return false;
 
   SafeWrap.insert(I);
 
-  if (OverflowConst.ugt(ICmpConst)) {
+  // Using C1 = OverflowConst and C2 = ICmpConst, we can either prove that:
+  //   zext(x) + sext(C1) <u zext(C2)  if C1 < 0 and C1 >s C2
+  //   zext(x) + sext(C1) <u sext(C2)  if C1 < 0 and C1 <=s C2
+  if (OverflowConst.sgt(ICmpConst)) {
     LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
                       << "const of " << *I << "\n");
     return true;
@@ -490,24 +487,18 @@ void IRPromoter::PromoteTree() {
         continue;
 
       if (auto *Const = dyn_cast<ConstantInt>(Op)) {
-        // For subtract, we only need to zext the constant. We only put it in
+        // For subtract, we don't need to sext the constant. We only put it in
         // SafeWrap because SafeWrap.size() is used elsewhere.
-        // For Add and ICmp we need to find how far the constant is from the
-        // top of its original unsigned range and place it the same distance
-        // from the top of its new unsigned range. We can do this by negating
-        // the constant, zero extending it, then negating in the new type.
-        APInt NewConst;
-        if (SafeWrap.contains(I)) {
-          if (I->getOpcode() == Instruction::ICmp)
-            NewConst = -((-Const->getValue()).zext(PromotedWidth));
-          else if (I->getOpcode() == Instruction::Add && i == 1)
-            NewConst = -((-Const->getValue()).zext(PromotedWidth));
-          else
-            NewConst = Const->getValue().zext(PromotedWidth);
-        } else
-          NewConst = Const->getValue().zext(PromotedWidth);
-
-        I->setOperand(i, ConstantInt::get(Const->getContext(), NewConst));
+        // For cmp, we need to sign extend a constant appearing in either
+        // operand. For add, we should only sign extend the RHS.
+        Constant *NewConst =
+            ConstantInt::get(Const->getContext(),
+                             (SafeWrap.contains(I) &&
+                              (I->getOpcode() == Instruction::ICmp || i == 1) &&
+                              I->getOpcode() != Instruction::Sub)
+                                 ? Const->getValue().sext(PromotedWidth)
+                                 : Const->getValue().zext(PromotedWidth));
+        I->setOperand(i, NewConst);
       } else if (isa<UndefValue>(Op))
         I->setOperand(i, ConstantInt::get(ExtTy, 0));
     }
@@ -926,7 +917,7 @@ bool TypePromotionImpl::run(Function &F, const TargetMachine *TM,
   bool MadeChange = false;
   const DataLayout &DL = F.getParent()->getDataLayout();
   const TargetSubtargetInfo *SubtargetInfo = TM->getSubtargetImpl(F);
-  TLI = SubtargetInfo->getTargetLowering();
+  const TargetLowering *TLI = SubtargetInfo->getTargetLowering();
   RegisterBitWidth =
       TTI.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar).getFixedValue();
   Ctx = &F.getParent()->getContext();
diff --git a/llvm/test/CodeGen/AArch64/and-mask-removal.ll b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
@@ -65,8 +65,9 @@ if.end:                                           ; preds = %if.then, %entry
 define zeroext i1 @test8_0(i8 zeroext %x)  align 2 {
 ; CHECK-LABEL: test8_0:
 ; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    sub w8, w0, #182
-; CHECK-NEXT:    cmn w8, #20
+; CHECK-NEXT:    add w8, w0, #74
+; CHECK-NEXT:    and w8, w8, #0xff
+; CHECK-NEXT:    cmp w8, #236
 ; CHECK-NEXT:    cset w0, lo
 ; CHECK-NEXT:    ret
 entry:
@@ -507,17 +508,16 @@ define i64 @pr58109(i8 signext %0) {
 define i64 @pr58109b(i8 signext %0, i64 %a, i64 %b) {
 ; CHECK-SD-LABEL: pr58109b:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:    and w8, w0, #0xff
-; CHECK-SD-NEXT:    sub w8, w8, #255
-; CHECK-SD-NEXT:    cmn w8, #254
-; CHECK-SD-NEXT:    csel x0, x1, x2, lo
+; CHECK-SD-NEXT:    add w8, w0, #1
+; CHECK-SD-NEXT:    tst w8, #0xfe
+; CHECK-SD-NEXT:    csel x0, x1, x2, eq
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: pr58109b:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    mov w8, #-255 ; =0xffffff01
-; CHECK-GI-NEXT:    add w8, w8, w0, uxtb
-; CHECK-GI-NEXT:    cmn w8, #254
+; CHECK-GI-NEXT:    add w8, w0, #1
+; CHECK-GI-NEXT:    and w8, w8, #0xff
+; CHECK-GI-NEXT:    cmp w8, #2
 ; CHECK-GI-NEXT:    csel x0, x1, x2, lo
 ; CHECK-GI-NEXT:    ret
   %2 = add i8 %0, 1
diff --git a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
@@ -396,7 +396,7 @@ define i1 @add_ultcmp_bad_i24_i8(i24 %x) nounwind {
 define i1 @add_ulecmp_bad_i16_i8(i16 %x) nounwind {
 ; CHECK-LABEL: add_ulecmp_bad_i16_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    mov w0, #1
 ; CHECK-NEXT:    ret
   %tmp0 = add i16 %x, 128 ; 1U << (8-1)
   %tmp1 = icmp ule i16 %tmp0, -1 ; when we +1 it, it will wrap to 0
diff --git a/llvm/test/CodeGen/AArch64/typepromotion-overflow.ll b/llvm/test/CodeGen/AArch64/typepromotion-overflow.ll
@@ -246,8 +246,9 @@ define i32 @safe_sub_var_imm(ptr nocapture readonly %b) local_unnamed_addr #1 {
 ; CHECK-LABEL: safe_sub_var_imm:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldrb w8, [x0]
-; CHECK-NEXT:    sub w8, w8, #248
-; CHECK-NEXT:    cmn w8, #4
+; CHECK-NEXT:    add w8, w8, #8
+; CHECK-NEXT:    and w8, w8, #0xff
+; CHECK-NEXT:    cmp w8, #252
 ; CHECK-NEXT:    cset w0, hi
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/RISCV/typepromotion-overflow.ll b/llvm/test/CodeGen/RISCV/typepromotion-overflow.ll
@@ -283,8 +283,9 @@ define i32 @safe_sub_var_imm(ptr nocapture readonly %b) local_unnamed_addr #1 {
 ; CHECK-LABEL: safe_sub_var_imm:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lbu a0, 0(a0)
-; CHECK-NEXT:    addi a0, a0, -248
-; CHECK-NEXT:    sltiu a0, a0, -3
+; CHECK-NEXT:    addi a0, a0, 8
+; CHECK-NEXT:    andi a0, a0, 255
+; CHECK-NEXT:    sltiu a0, a0, 253
 ; CHECK-NEXT:    xori a0, a0, 1
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/Transforms/TypePromotion/ARM/icmps.ll b/llvm/test/Transforms/TypePromotion/ARM/icmps.ll
@@ -4,9 +4,8 @@
 define i32 @test_ult_254_inc_imm(i8 zeroext %x) {
 ; CHECK-LABEL: @test_ult_254_inc_imm(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[X:%.*]] to i32
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP0]], -255
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[ADD]], -2
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[X:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[ADD]], -2
 ; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 35, i32 47
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
@@ -369,7 +368,7 @@ if.end:
 define i32 @degenerateicmp() {
 ; CHECK-LABEL: @degenerateicmp(
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 190, 0
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt i32 -31, [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt i32 225, [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 1, i32 0
 ; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
diff --git a/llvm/test/Transforms/TypePromotion/ARM/wrapping.ll b/llvm/test/Transforms/TypePromotion/ARM/wrapping.ll
@@ -89,9 +89,8 @@ define i32 @overflow_add_const_limit(i8 zeroext %a, i8 zeroext %b) {
 
 define i32 @overflow_add_positive_const_limit(i8 zeroext %a) {
 ; CHECK-LABEL: @overflow_add_positive_const_limit(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP1]], -255
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[ADD]], -128
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[A:%.*]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[ADD]], -128
 ; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
@@ -145,9 +144,8 @@ define i32 @safe_add_underflow_neg(i8 zeroext %a) {
 
 define i32 @overflow_sub_negative_const_limit(i8 zeroext %a) {
 ; CHECK-LABEL: @overflow_sub_negative_const_limit(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[A:%.*]] to i32
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[TMP1]], 255
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[SUB]], -128
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 [[A:%.*]], -1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[SUB]], -128
 ; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 8, i32 16
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;