@@ -136,6 +136,7 @@ class IRPromoter {
136
136
137
137
class TypePromotionImpl {
138
138
unsigned TypeSize = 0 ;
139
+ const TargetLowering *TLI = nullptr ;
139
140
LLVMContext *Ctx = nullptr ;
140
141
unsigned RegisterBitWidth = 0 ;
141
142
SmallPtrSet<Value *, 16 > AllVisited;
@@ -272,64 +273,58 @@ bool TypePromotionImpl::isSink(Value *V) {
272
273
273
274
// / Return whether this instruction can safely wrap.
274
275
bool TypePromotionImpl::isSafeWrap (Instruction *I) {
275
- // We can support a potentially wrapping instruction (I) if:
276
+ // We can support a potentially wrapping Add/Sub instruction (I) if:
276
277
// - It is only used by an unsigned icmp.
277
278
// - The icmp uses a constant.
278
- // - The wrapping value (I) is decreasing, i.e would underflow - wrapping
279
- // around zero to become a larger number than before.
280
279
// - The wrapping instruction (I) also uses a constant.
281
280
//
282
- // We can then use the two constants to calculate whether the result would
283
- // wrap in respect to itself in the original bitwidth. If it doesn't wrap,
284
- // just underflows the range, the icmp would give the same result whether the
285
- // result has been truncated or not. We calculate this by:
286
- // - Zero extending both constants, if needed, to RegisterBitWidth.
287
- // - Take the absolute value of I's constant, adding this to the icmp const.
288
- // - Check that this value is not out of range for small type. If it is, it
289
- // means that it has underflowed enough to wrap around the icmp constant.
281
+ // This a common pattern emitted to check if a value is within a range.
290
282
//
291
283
// For example:
292
284
//
293
- // %sub = sub i8 %a, 2
294
- // %cmp = icmp ule i8 %sub, 254
285
+ // %sub = sub i8 %a, C1
286
+ // %cmp = icmp ule i8 %sub, C2
287
+ //
288
+ // or
289
+ //
290
+ // %add = add i8 %a, C1
291
+ // %cmp = icmp ule i8 %add, C2.
295
292
//
296
- // If %a = 0, %sub = -2 == FE == 254
297
- // But if this is evalulated as a i32
298
- // %sub = -2 == FF FF FF FE == 4294967294
299
- // So the unsigned compares (i8 and i32) would not yield the same result .
293
+ // We will treat an add as though it were a subtract by -C1. To promote
294
+ // the Add/Sub we will zero extend the LHS and the subtracted amount. For Add,
295
+ // this means we need to negate the constant, zero extend to RegisterBitWidth,
296
+ // and negate in the larger type .
300
297
//
301
- // Another way to look at it is:
302
- // %a - 2 <= 254
303
- // %a + 2 <= 254 + 2
304
- // %a <= 256
305
- // And we can't represent 256 in the i8 format, so we don't support it.
298
+ // This will produce a value in the range [-zext(C1), zext(X)-zext(C1)] where
299
+ // C1 is the subtracted amount. This is either a small unsigned number or a
300
+ // large unsigned number in the promoted type.
306
301
//
307
- // Whereas:
302
+ // Now we need to correct the compare constant C2. Values >= C1 in the
303
+ // original add result range have been remapped to large values in the
304
+ // promoted range. If the compare constant fell into this range we need to
305
+ // remap it as well. We can do this as -(zext(-C2)).
308
306
//
309
- // %sub i8 %a, 1
307
+ // For example:
308
+ //
309
+ // %sub = sub i8 %a, 2
310
310
// %cmp = icmp ule i8 %sub, 254
311
311
//
312
- // If %a = 0, %sub = -1 == FF == 255
313
- // As i32:
314
- // %sub = -1 == FF FF FF FF == 4294967295
312
+ // becomes
315
313
//
316
- // In this case, the unsigned compare results would be the same and this
317
- // would also be true for ult, uge and ugt:
318
- // - (255 < 254) == (0xFFFFFFFF < 254) == false
319
- // - (255 <= 254) == (0xFFFFFFFF <= 254) == false
320
- // - (255 > 254) == (0xFFFFFFFF > 254) == true
321
- // - (255 >= 254) == (0xFFFFFFFF >= 254) == true
314
+ // %zext = zext %a to i32
315
+ // %sub = sub i32 %zext, 2
316
+ // %cmp = icmp ule i32 %sub, 4294967294
322
317
//
323
- // To demonstrate why we can't handle increasing values :
318
+ // Another example :
324
319
//
325
- // %add = add i8 %a, 2
326
- // %cmp = icmp ult i8 %add, 127
320
+ // %sub = sub i8 %a, 1
321
+ // %cmp = icmp ule i8 %sub, 254
327
322
//
328
- // If %a = 254, %add = 256 == (i8 1)
329
- // As i32:
330
- // %add = 256
323
+ // becomes
331
324
//
332
- // (1 < 127) != (256 < 127)
325
+ // %zext = zext %a to i32
326
+ // %sub = sub i32 %zext, 1
327
+ // %cmp = icmp ule i32 %sub, 254
333
328
334
329
unsigned Opc = I->getOpcode ();
335
330
if (Opc != Instruction::Add && Opc != Instruction::Sub)
@@ -356,21 +351,29 @@ bool TypePromotionImpl::isSafeWrap(Instruction *I) {
356
351
APInt OverflowConst = cast<ConstantInt>(I->getOperand (1 ))->getValue ();
357
352
if (Opc == Instruction::Sub)
358
353
OverflowConst = -OverflowConst;
359
- if (!OverflowConst.isNonPositive ())
360
- return false ;
354
+
355
+ // If the constant is positive, we will end up filling the promoted bits with
356
+ // all 1s. Make sure that results in a cheap add constant.
357
+ if (!OverflowConst.isNonPositive ()) {
358
+ // We don't have the true promoted width, just use 64 so we can create an
359
+ // int64_t for the isLegalAddImmediate call.
360
+ if (OverflowConst.getBitWidth () >= 64 )
361
+ return false ;
362
+
363
+ APInt NewConst = -((-OverflowConst).zext (64 ));
364
+ if (!TLI->isLegalAddImmediate (NewConst.getSExtValue ()))
365
+ return false ;
366
+ }
361
367
362
368
SafeWrap.insert (I);
363
369
364
- // Using C1 = OverflowConst and C2 = ICmpConst, we can either prove that:
365
- // zext(x) + sext(C1) <u zext(C2) if C1 < 0 and C1 >s C2
366
- // zext(x) + sext(C1) <u sext(C2) if C1 < 0 and C1 <=s C2
367
- if (OverflowConst.sgt (ICmpConst)) {
368
- LLVM_DEBUG (dbgs () << " IR Promotion: Allowing safe overflow for sext "
370
+ if (OverflowConst == 0 || OverflowConst.ugt (ICmpConst)) {
371
+ LLVM_DEBUG (dbgs () << " IR Promotion: Allowing safe overflow for "
369
372
<< " const of " << *I << " \n " );
370
373
return true ;
371
374
}
372
375
373
- LLVM_DEBUG (dbgs () << " IR Promotion: Allowing safe overflow for sext "
376
+ LLVM_DEBUG (dbgs () << " IR Promotion: Allowing safe overflow for "
374
377
<< " const of " << *I << " and " << *CI << " \n " );
375
378
SafeWrap.insert (CI);
376
379
return true ;
@@ -487,18 +490,24 @@ void IRPromoter::PromoteTree() {
487
490
continue ;
488
491
489
492
if (auto *Const = dyn_cast<ConstantInt>(Op)) {
490
- // For subtract, we don't need to sext the constant. We only put it in
493
+ // For subtract, we only need to zext the constant. We only put it in
491
494
// SafeWrap because SafeWrap.size() is used elsewhere.
492
- // For cmp, we need to sign extend a constant appearing in either
493
- // operand. For add, we should only sign extend the RHS.
494
- Constant *NewConst =
495
- ConstantInt::get (Const->getContext (),
496
- (SafeWrap.contains (I) &&
497
- (I->getOpcode () == Instruction::ICmp || i == 1 ) &&
498
- I->getOpcode () != Instruction::Sub)
499
- ? Const->getValue ().sext (PromotedWidth)
500
- : Const->getValue ().zext (PromotedWidth));
501
- I->setOperand (i, NewConst);
495
+ // For Add and ICmp we need to find how far the constant is from the
496
+ // top of its original unsigned range and place it the same distance
497
+ // from the top of its new unsigned range. We can do this by negating
498
+ // the constant, zero extending it, then negating in the new type.
499
+ APInt NewConst;
500
+ if (SafeWrap.contains (I)) {
501
+ if (I->getOpcode () == Instruction::ICmp)
502
+ NewConst = -((-Const->getValue ()).zext (PromotedWidth));
503
+ else if (I->getOpcode () == Instruction::Add && i == 1 )
504
+ NewConst = -((-Const->getValue ()).zext (PromotedWidth));
505
+ else
506
+ NewConst = Const->getValue ().zext (PromotedWidth);
507
+ } else
508
+ NewConst = Const->getValue ().zext (PromotedWidth);
509
+
510
+ I->setOperand (i, ConstantInt::get (Const->getContext (), NewConst));
502
511
} else if (isa<UndefValue>(Op))
503
512
I->setOperand (i, ConstantInt::get (ExtTy, 0 ));
504
513
}
@@ -917,7 +926,7 @@ bool TypePromotionImpl::run(Function &F, const TargetMachine *TM,
917
926
bool MadeChange = false ;
918
927
const DataLayout &DL = F.getParent ()->getDataLayout ();
919
928
const TargetSubtargetInfo *SubtargetInfo = TM->getSubtargetImpl (F);
920
- const TargetLowering * TLI = SubtargetInfo->getTargetLowering ();
929
+ TLI = SubtargetInfo->getTargetLowering ();
921
930
RegisterBitWidth =
922
931
TTI.getRegisterBitWidth (TargetTransformInfo::RGK_Scalar).getFixedValue ();
923
932
Ctx = &F.getParent ()->getContext ();
0 commit comments