Skip to content

Commit b289dc5

Browse files
committed
[CVP] Narrow SDiv/SRem to the smallest power-of-2 that's sufficient to contain its operands
This is practically identical to what we already do for UDiv/URem: https://rise4fun.com/Alive/04K Name: narrow udiv Pre: C0 u<= 255 && C1 u<= 255 %r = udiv i16 C0, C1 => %t0 = trunc i16 C0 to i8 %t1 = trunc i16 C1 to i8 %t2 = udiv i8 %t0, %t1 %r = zext i8 %t2 to i16 Name: narrow exact udiv Pre: C0 u<= 255 && C1 u<= 255 %r = udiv exact i16 C0, C1 => %t0 = trunc i16 C0 to i8 %t1 = trunc i16 C1 to i8 %t2 = udiv exact i8 %t0, %t1 %r = zext i8 %t2 to i16 Name: narrow urem Pre: C0 u<= 255 && C1 u<= 255 %r = urem i16 C0, C1 => %t0 = trunc i16 C0 to i8 %t1 = trunc i16 C1 to i8 %t2 = urem i8 %t0, %t1 %r = zext i8 %t2 to i16 ... only here we need to look for 'min signed bits', not 'active bits', and there's an UB to be aware of: https://rise4fun.com/Alive/KG86 https://rise4fun.com/Alive/LwR Name: narrow sdiv Pre: C0 <= 127 && C1 <= 127 && C0 >= -128 && C1 >= -128 %r = sdiv i16 C0, C1 => %t0 = trunc i16 C0 to i9 %t1 = trunc i16 C1 to i9 %t2 = sdiv i9 %t0, %t1 %r = sext i9 %t2 to i16 Name: narrow exact sdiv Pre: C0 <= 127 && C1 <= 127 && C0 >= -128 && C1 >= -128 %r = sdiv exact i16 C0, C1 => %t0 = trunc i16 C0 to i9 %t1 = trunc i16 C1 to i9 %t2 = sdiv exact i9 %t0, %t1 %r = sext i9 %t2 to i16 Name: narrow srem Pre: C0 <= 127 && C1 <= 127 && C0 >= -128 && C1 >= -128 %r = srem i16 C0, C1 => %t0 = trunc i16 C0 to i9 %t1 = trunc i16 C1 to i9 %t2 = srem i9 %t0, %t1 %r = sext i9 %t2 to i16 Name: narrow sdiv Pre: C0 <= 127 && C1 <= 127 && C0 >= -128 && C1 >= -128 && !(C0 == -128 && C1 == -1) %r = sdiv i16 C0, C1 => %t0 = trunc i16 C0 to i8 %t1 = trunc i16 C1 to i8 %t2 = sdiv i8 %t0, %t1 %r = sext i8 %t2 to i16 Name: narrow exact sdiv Pre: C0 <= 127 && C1 <= 127 && C0 >= -128 && C1 >= -128 && !(C0 == -128 && C1 == -1) %r = sdiv exact i16 C0, C1 => %t0 = trunc i16 C0 to i8 %t1 = trunc i16 C1 to i8 %t2 = sdiv exact i8 %t0, %t1 %r = sext i8 %t2 to i16 Name: narrow srem Pre: C0 <= 127 && C1 <= 127 && C0 >= -128 && C1 >= -128 && !(C0 == -128 && C1 == -1) %r = srem i16 C0, C1 => %t0 = trunc i16 C0 to i8 %t1 = trunc i16 C1 to i8 %t2 = srem i8 %t0, %t1 %r = sext i8 %t2 to i16 The ConstantRangeTest.losslessSignedTruncationSignext test sanity-checks the logic, that we can losslessly truncate ConstantRange to `getMinSignedBits()` and signext it back, and it will be identical to the original CR. On vanilla llvm test-suite + RawSpeed, this fires 1262 times, while the same fold for UDiv/URem only fires 384 times. Sic! Additionally, this causes +606.18% (+1079) extra cases of aggressive-instcombine.NumDAGsReduced, and +473.14% (+1145) of aggressive-instcombine.NumInstrsReduced folds.
1 parent cb10d5d commit b289dc5

File tree

3 files changed

+171
-41
lines changed

3 files changed

+171
-41
lines changed

llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp

Lines changed: 76 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ STATISTIC(NumMemAccess, "Number of memory access targets propagated");
5858
STATISTIC(NumCmps, "Number of comparisons propagated");
5959
STATISTIC(NumReturns, "Number of return values propagated");
6060
STATISTIC(NumDeadCases, "Number of switch cases removed");
61+
STATISTIC(NumSDivSRemsNarrowed,
62+
"Number of sdivs/srems whose width was decreased");
6163
STATISTIC(NumSDivs, "Number of sdiv converted to udiv");
6264
STATISTIC(NumUDivURemsNarrowed,
6365
"Number of udivs/urems whose width was decreased");
@@ -624,6 +626,60 @@ Domain getDomain(Value *V, LazyValueInfo *LVI, Instruction *CxtI) {
624626
return Domain::Unknown;
625627
};
626628

629+
/// Try to shrink a sdiv/srem's width down to the smallest power of two that's
630+
/// sufficient to contain its operands.
631+
static bool narrowSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) {
632+
assert(Instr->getOpcode() == Instruction::SDiv ||
633+
Instr->getOpcode() == Instruction::SRem);
634+
if (Instr->getType()->isVectorTy())
635+
return false;
636+
637+
// Find the smallest power of two bitwidth that's sufficient to hold Instr's
638+
// operands.
639+
unsigned OrigWidth = Instr->getType()->getIntegerBitWidth();
640+
641+
// What is the smallest bit width that can accomodate the entire value ranges
642+
// of both of the operands?
643+
std::array<Optional<ConstantRange>, 2> CRs;
644+
unsigned MinSignedBits = 0;
645+
for (auto I : zip(Instr->operands(), CRs)) {
646+
std::get<1>(I) = LVI->getConstantRange(std::get<0>(I), Instr->getParent());
647+
MinSignedBits = std::max(std::get<1>(I)->getMinSignedBits(), MinSignedBits);
648+
}
649+
650+
// sdiv/srem is UB if divisor is -1 and divident is INT_MIN, so unless we can
651+
// prove that such a combination is impossible, we need to bump the bitwidth.
652+
if (CRs[1]->contains(APInt::getAllOnesValue(OrigWidth)) &&
653+
CRs[0]->contains(
654+
APInt::getSignedMinValue(MinSignedBits).sextOrSelf(OrigWidth)))
655+
++MinSignedBits;
656+
657+
// Don't shrink below 8 bits wide.
658+
unsigned NewWidth = std::max<unsigned>(PowerOf2Ceil(MinSignedBits), 8);
659+
660+
// NewWidth might be greater than OrigWidth if OrigWidth is not a power of
661+
// two.
662+
if (NewWidth >= OrigWidth)
663+
return false;
664+
665+
++NumSDivSRemsNarrowed;
666+
IRBuilder<> B{Instr};
667+
auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth);
668+
auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy,
669+
Instr->getName() + ".lhs.trunc");
670+
auto *RHS = B.CreateTruncOrBitCast(Instr->getOperand(1), TruncTy,
671+
Instr->getName() + ".rhs.trunc");
672+
auto *BO = B.CreateBinOp(Instr->getOpcode(), LHS, RHS, Instr->getName());
673+
auto *Sext = B.CreateSExt(BO, Instr->getType(), Instr->getName() + ".sext");
674+
if (auto *BinOp = dyn_cast<BinaryOperator>(BO))
675+
if (BinOp->getOpcode() == Instruction::SDiv)
676+
BinOp->setIsExact(Instr->isExact());
677+
678+
Instr->replaceAllUsesWith(Sext);
679+
Instr->eraseFromParent();
680+
return true;
681+
}
682+
627683
/// Try to shrink a udiv/urem's width down to the smallest power of two that's
628684
/// sufficient to contain its operands.
629685
static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
@@ -669,6 +725,7 @@ static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
669725
}
670726

671727
static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) {
728+
assert(SDI->getOpcode() == Instruction::SRem);
672729
if (SDI->getType()->isVectorTy())
673730
return false;
674731

@@ -724,6 +781,7 @@ static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) {
724781
/// conditions, this can sometimes prove conditions instcombine can't by
725782
/// exploiting range information.
726783
static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) {
784+
assert(SDI->getOpcode() == Instruction::SDiv);
727785
if (SDI->getType()->isVectorTy())
728786
return false;
729787

@@ -774,6 +832,23 @@ static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) {
774832
return true;
775833
}
776834

835+
static bool processSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) {
836+
assert(Instr->getOpcode() == Instruction::SDiv ||
837+
Instr->getOpcode() == Instruction::SRem);
838+
if (Instr->getType()->isVectorTy())
839+
return false;
840+
841+
if (Instr->getOpcode() == Instruction::SDiv)
842+
if (processSDiv(Instr, LVI))
843+
return true;
844+
845+
if (Instr->getOpcode() == Instruction::SRem)
846+
if (processSRem(Instr, LVI))
847+
return true;
848+
849+
return narrowSDivOrSRem(Instr, LVI);
850+
}
851+
777852
static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
778853
if (SDI->getType()->isVectorTy())
779854
return false;
@@ -935,10 +1010,8 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
9351010
BBChanged |= processCallSite(cast<CallBase>(*II), LVI);
9361011
break;
9371012
case Instruction::SRem:
938-
BBChanged |= processSRem(cast<BinaryOperator>(II), LVI);
939-
break;
9401013
case Instruction::SDiv:
941-
BBChanged |= processSDiv(cast<BinaryOperator>(II), LVI);
1014+
BBChanged |= processSDivOrSRem(cast<BinaryOperator>(II), LVI);
9421015
break;
9431016
case Instruction::UDiv:
9441017
case Instruction::URem:

llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll

Lines changed: 50 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -271,8 +271,11 @@ define i64 @test11_i15_i15(i64 %x, i64 %y) {
271271
; CHECK-NEXT: call void @llvm.assume(i1 [[C3]])
272272
; CHECK-NEXT: br label [[END:%.*]]
273273
; CHECK: end:
274-
; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[X]], [[Y]]
275-
; CHECK-NEXT: ret i64 [[DIV]]
274+
; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i64 [[X]] to i16
275+
; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i64 [[Y]] to i16
276+
; CHECK-NEXT: [[DIV1:%.*]] = sdiv i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]]
277+
; CHECK-NEXT: [[DIV_SEXT:%.*]] = sext i16 [[DIV1]] to i64
278+
; CHECK-NEXT: ret i64 [[DIV_SEXT]]
276279
;
277280
entry:
278281
%c0 = icmp sle i64 %x, 16383
@@ -306,8 +309,11 @@ define i64 @test12_i16_i16(i64 %x, i64 %y) {
306309
; CHECK-NEXT: call void @llvm.assume(i1 [[C3]])
307310
; CHECK-NEXT: br label [[END:%.*]]
308311
; CHECK: end:
309-
; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[X]], [[Y]]
310-
; CHECK-NEXT: ret i64 [[DIV]]
312+
; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i64 [[X]] to i32
313+
; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i64 [[Y]] to i32
314+
; CHECK-NEXT: [[DIV1:%.*]] = sdiv i32 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]]
315+
; CHECK-NEXT: [[DIV_SEXT:%.*]] = sext i32 [[DIV1]] to i64
316+
; CHECK-NEXT: ret i64 [[DIV_SEXT]]
311317
;
312318
entry:
313319
%c0 = icmp sle i64 %x, 32767
@@ -338,8 +344,11 @@ define i64 @test13_i16_u15(i64 %x, i64 %y) {
338344
; CHECK-NEXT: call void @llvm.assume(i1 [[C2]])
339345
; CHECK-NEXT: br label [[END:%.*]]
340346
; CHECK: end:
341-
; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[X]], [[Y]]
342-
; CHECK-NEXT: ret i64 [[DIV]]
347+
; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i64 [[X]] to i16
348+
; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i64 [[Y]] to i16
349+
; CHECK-NEXT: [[DIV1:%.*]] = sdiv i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]]
350+
; CHECK-NEXT: [[DIV_SEXT:%.*]] = sext i16 [[DIV1]] to i64
351+
; CHECK-NEXT: ret i64 [[DIV_SEXT]]
343352
;
344353
entry:
345354
%c0 = icmp sle i64 %x, 32767
@@ -371,8 +380,11 @@ define i64 @test14_i16safe_i16(i64 %x, i64 %y) {
371380
; CHECK-NEXT: call void @llvm.assume(i1 [[C3]])
372381
; CHECK-NEXT: br label [[END:%.*]]
373382
; CHECK: end:
374-
; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[X]], [[Y]]
375-
; CHECK-NEXT: ret i64 [[DIV]]
383+
; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i64 [[X]] to i16
384+
; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i64 [[Y]] to i16
385+
; CHECK-NEXT: [[DIV1:%.*]] = sdiv i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]]
386+
; CHECK-NEXT: [[DIV_SEXT:%.*]] = sext i16 [[DIV1]] to i64
387+
; CHECK-NEXT: ret i64 [[DIV_SEXT]]
376388
;
377389
entry:
378390
%c0 = icmp sle i64 %x, 32767
@@ -403,8 +415,11 @@ define i64 @test15_i16safe_u15(i64 %x, i64 %y) {
403415
; CHECK-NEXT: call void @llvm.assume(i1 [[C2]])
404416
; CHECK-NEXT: br label [[END:%.*]]
405417
; CHECK: end:
406-
; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[X]], [[Y]]
407-
; CHECK-NEXT: ret i64 [[DIV]]
418+
; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i64 [[X]] to i16
419+
; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i64 [[Y]] to i16
420+
; CHECK-NEXT: [[DIV1:%.*]] = sdiv i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]]
421+
; CHECK-NEXT: [[DIV_SEXT:%.*]] = sext i16 [[DIV1]] to i64
422+
; CHECK-NEXT: ret i64 [[DIV_SEXT]]
408423
;
409424
entry:
410425
%c0 = icmp sle i64 %x, 32767
@@ -435,8 +450,11 @@ define i64 @test16_i4_i4(i64 %x, i64 %y) {
435450
; CHECK-NEXT: call void @llvm.assume(i1 [[C3]])
436451
; CHECK-NEXT: br label [[END:%.*]]
437452
; CHECK: end:
438-
; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[X]], [[Y]]
439-
; CHECK-NEXT: ret i64 [[DIV]]
453+
; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i64 [[X]] to i8
454+
; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i64 [[Y]] to i8
455+
; CHECK-NEXT: [[DIV1:%.*]] = sdiv i8 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]]
456+
; CHECK-NEXT: [[DIV_SEXT:%.*]] = sext i8 [[DIV1]] to i64
457+
; CHECK-NEXT: ret i64 [[DIV_SEXT]]
440458
;
441459
entry:
442460
%c0 = icmp sle i64 %x, 3
@@ -469,8 +487,11 @@ define i64 @test17_i9_i9(i64 %x, i64 %y) {
469487
; CHECK-NEXT: call void @llvm.assume(i1 [[C3]])
470488
; CHECK-NEXT: br label [[END:%.*]]
471489
; CHECK: end:
472-
; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[X]], [[Y]]
473-
; CHECK-NEXT: ret i64 [[DIV]]
490+
; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i64 [[X]] to i16
491+
; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i64 [[Y]] to i16
492+
; CHECK-NEXT: [[DIV1:%.*]] = sdiv i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]]
493+
; CHECK-NEXT: [[DIV_SEXT:%.*]] = sext i16 [[DIV1]] to i64
494+
; CHECK-NEXT: ret i64 [[DIV_SEXT]]
474495
;
475496
entry:
476497
%c0 = icmp sle i64 %x, 255
@@ -569,8 +590,11 @@ define i64 @test20_i16_i18(i64 %x, i64 %y) {
569590
; CHECK-NEXT: call void @llvm.assume(i1 [[C3]])
570591
; CHECK-NEXT: br label [[END:%.*]]
571592
; CHECK: end:
572-
; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[X]], [[Y]]
573-
; CHECK-NEXT: ret i64 [[DIV]]
593+
; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i64 [[X]] to i32
594+
; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i64 [[Y]] to i32
595+
; CHECK-NEXT: [[DIV1:%.*]] = sdiv i32 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]]
596+
; CHECK-NEXT: [[DIV_SEXT:%.*]] = sext i32 [[DIV1]] to i64
597+
; CHECK-NEXT: ret i64 [[DIV_SEXT]]
574598
;
575599
entry:
576600
%c0 = icmp sle i64 %x, 16383
@@ -601,8 +625,11 @@ define i64 @test21_i18_i16(i64 %x, i64 %y) {
601625
; CHECK-NEXT: call void @llvm.assume(i1 [[C3]])
602626
; CHECK-NEXT: br label [[END:%.*]]
603627
; CHECK: end:
604-
; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[X]], [[Y]]
605-
; CHECK-NEXT: ret i64 [[DIV]]
628+
; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i64 [[X]] to i32
629+
; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i64 [[Y]] to i32
630+
; CHECK-NEXT: [[DIV1:%.*]] = sdiv i32 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]]
631+
; CHECK-NEXT: [[DIV_SEXT:%.*]] = sext i32 [[DIV1]] to i64
632+
; CHECK-NEXT: ret i64 [[DIV_SEXT]]
606633
;
607634
entry:
608635
%c0 = icmp sle i64 %x, 65535
@@ -635,8 +662,11 @@ define i64 @test22_i16_i16(i64 %x, i64 %y) {
635662
; CHECK-NEXT: call void @llvm.assume(i1 [[C3]])
636663
; CHECK-NEXT: br label [[END:%.*]]
637664
; CHECK: end:
638-
; CHECK-NEXT: [[DIV:%.*]] = sdiv exact i64 [[X]], [[Y]]
639-
; CHECK-NEXT: ret i64 [[DIV]]
665+
; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i64 [[X]] to i32
666+
; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i64 [[Y]] to i32
667+
; CHECK-NEXT: [[DIV1:%.*]] = sdiv exact i32 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]]
668+
; CHECK-NEXT: [[DIV_SEXT:%.*]] = sext i32 [[DIV1]] to i64
669+
; CHECK-NEXT: ret i64 [[DIV_SEXT]]
640670
;
641671
entry:
642672
%c0 = icmp sle i64 %x, 32767

llvm/test/Transforms/CorrelatedValuePropagation/srem.ll

Lines changed: 45 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,11 @@ define i64 @test11_i15_i15(i64 %x, i64 %y) {
184184
; CHECK-NEXT: call void @llvm.assume(i1 [[C3]])
185185
; CHECK-NEXT: br label [[END:%.*]]
186186
; CHECK: end:
187-
; CHECK-NEXT: [[DIV:%.*]] = srem i64 [[X]], [[Y]]
188-
; CHECK-NEXT: ret i64 [[DIV]]
187+
; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i64 [[X]] to i16
188+
; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i64 [[Y]] to i16
189+
; CHECK-NEXT: [[DIV1:%.*]] = srem i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]]
190+
; CHECK-NEXT: [[DIV_SEXT:%.*]] = sext i16 [[DIV1]] to i64
191+
; CHECK-NEXT: ret i64 [[DIV_SEXT]]
189192
;
190193
entry:
191194
%c0 = icmp sle i64 %x, 16383
@@ -219,8 +222,11 @@ define i64 @test12_i16_i16(i64 %x, i64 %y) {
219222
; CHECK-NEXT: call void @llvm.assume(i1 [[C3]])
220223
; CHECK-NEXT: br label [[END:%.*]]
221224
; CHECK: end:
222-
; CHECK-NEXT: [[DIV:%.*]] = srem i64 [[X]], [[Y]]
223-
; CHECK-NEXT: ret i64 [[DIV]]
225+
; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i64 [[X]] to i32
226+
; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i64 [[Y]] to i32
227+
; CHECK-NEXT: [[DIV1:%.*]] = srem i32 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]]
228+
; CHECK-NEXT: [[DIV_SEXT:%.*]] = sext i32 [[DIV1]] to i64
229+
; CHECK-NEXT: ret i64 [[DIV_SEXT]]
224230
;
225231
entry:
226232
%c0 = icmp sle i64 %x, 32767
@@ -251,8 +257,11 @@ define i64 @test13_i16_u15(i64 %x, i64 %y) {
251257
; CHECK-NEXT: call void @llvm.assume(i1 [[C2]])
252258
; CHECK-NEXT: br label [[END:%.*]]
253259
; CHECK: end:
254-
; CHECK-NEXT: [[DIV:%.*]] = srem i64 [[X]], [[Y]]
255-
; CHECK-NEXT: ret i64 [[DIV]]
260+
; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i64 [[X]] to i16
261+
; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i64 [[Y]] to i16
262+
; CHECK-NEXT: [[DIV1:%.*]] = srem i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]]
263+
; CHECK-NEXT: [[DIV_SEXT:%.*]] = sext i16 [[DIV1]] to i64
264+
; CHECK-NEXT: ret i64 [[DIV_SEXT]]
256265
;
257266
entry:
258267
%c0 = icmp sle i64 %x, 32767
@@ -284,8 +293,11 @@ define i64 @test14_i16safe_i16(i64 %x, i64 %y) {
284293
; CHECK-NEXT: call void @llvm.assume(i1 [[C3]])
285294
; CHECK-NEXT: br label [[END:%.*]]
286295
; CHECK: end:
287-
; CHECK-NEXT: [[DIV:%.*]] = srem i64 [[X]], [[Y]]
288-
; CHECK-NEXT: ret i64 [[DIV]]
296+
; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i64 [[X]] to i16
297+
; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i64 [[Y]] to i16
298+
; CHECK-NEXT: [[DIV1:%.*]] = srem i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]]
299+
; CHECK-NEXT: [[DIV_SEXT:%.*]] = sext i16 [[DIV1]] to i64
300+
; CHECK-NEXT: ret i64 [[DIV_SEXT]]
289301
;
290302
entry:
291303
%c0 = icmp sle i64 %x, 32767
@@ -316,8 +328,11 @@ define i64 @test15_i16safe_u15(i64 %x, i64 %y) {
316328
; CHECK-NEXT: call void @llvm.assume(i1 [[C2]])
317329
; CHECK-NEXT: br label [[END:%.*]]
318330
; CHECK: end:
319-
; CHECK-NEXT: [[DIV:%.*]] = srem i64 [[X]], [[Y]]
320-
; CHECK-NEXT: ret i64 [[DIV]]
331+
; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i64 [[X]] to i16
332+
; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i64 [[Y]] to i16
333+
; CHECK-NEXT: [[DIV1:%.*]] = srem i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]]
334+
; CHECK-NEXT: [[DIV_SEXT:%.*]] = sext i16 [[DIV1]] to i64
335+
; CHECK-NEXT: ret i64 [[DIV_SEXT]]
321336
;
322337
entry:
323338
%c0 = icmp sle i64 %x, 32767
@@ -348,8 +363,11 @@ define i64 @test16_i4_i4(i64 %x, i64 %y) {
348363
; CHECK-NEXT: call void @llvm.assume(i1 [[C3]])
349364
; CHECK-NEXT: br label [[END:%.*]]
350365
; CHECK: end:
351-
; CHECK-NEXT: [[DIV:%.*]] = srem i64 [[X]], [[Y]]
352-
; CHECK-NEXT: ret i64 [[DIV]]
366+
; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i64 [[X]] to i8
367+
; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i64 [[Y]] to i8
368+
; CHECK-NEXT: [[DIV1:%.*]] = srem i8 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]]
369+
; CHECK-NEXT: [[DIV_SEXT:%.*]] = sext i8 [[DIV1]] to i64
370+
; CHECK-NEXT: ret i64 [[DIV_SEXT]]
353371
;
354372
entry:
355373
%c0 = icmp sle i64 %x, 3
@@ -382,8 +400,11 @@ define i64 @test17_i9_i9(i64 %x, i64 %y) {
382400
; CHECK-NEXT: call void @llvm.assume(i1 [[C3]])
383401
; CHECK-NEXT: br label [[END:%.*]]
384402
; CHECK: end:
385-
; CHECK-NEXT: [[DIV:%.*]] = srem i64 [[X]], [[Y]]
386-
; CHECK-NEXT: ret i64 [[DIV]]
403+
; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i64 [[X]] to i16
404+
; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i64 [[Y]] to i16
405+
; CHECK-NEXT: [[DIV1:%.*]] = srem i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]]
406+
; CHECK-NEXT: [[DIV_SEXT:%.*]] = sext i16 [[DIV1]] to i64
407+
; CHECK-NEXT: ret i64 [[DIV_SEXT]]
387408
;
388409
entry:
389410
%c0 = icmp sle i64 %x, 255
@@ -482,8 +503,11 @@ define i64 @test20_i16_i18(i64 %x, i64 %y) {
482503
; CHECK-NEXT: call void @llvm.assume(i1 [[C3]])
483504
; CHECK-NEXT: br label [[END:%.*]]
484505
; CHECK: end:
485-
; CHECK-NEXT: [[DIV:%.*]] = srem i64 [[X]], [[Y]]
486-
; CHECK-NEXT: ret i64 [[DIV]]
506+
; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i64 [[X]] to i32
507+
; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i64 [[Y]] to i32
508+
; CHECK-NEXT: [[DIV1:%.*]] = srem i32 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]]
509+
; CHECK-NEXT: [[DIV_SEXT:%.*]] = sext i32 [[DIV1]] to i64
510+
; CHECK-NEXT: ret i64 [[DIV_SEXT]]
487511
;
488512
entry:
489513
%c0 = icmp sle i64 %x, 16383
@@ -514,8 +538,11 @@ define i64 @test21_i18_i16(i64 %x, i64 %y) {
514538
; CHECK-NEXT: call void @llvm.assume(i1 [[C3]])
515539
; CHECK-NEXT: br label [[END:%.*]]
516540
; CHECK: end:
517-
; CHECK-NEXT: [[DIV:%.*]] = srem i64 [[X]], [[Y]]
518-
; CHECK-NEXT: ret i64 [[DIV]]
541+
; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i64 [[X]] to i32
542+
; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i64 [[Y]] to i32
543+
; CHECK-NEXT: [[DIV1:%.*]] = srem i32 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]]
544+
; CHECK-NEXT: [[DIV_SEXT:%.*]] = sext i32 [[DIV1]] to i64
545+
; CHECK-NEXT: ret i64 [[DIV_SEXT]]
519546
;
520547
entry:
521548
%c0 = icmp sle i64 %x, 65535

0 commit comments

Comments
 (0)