Skip to content

Commit b7dbeec

Browse files
committed
[InstCombine] lshr (mul (X, 2^N + 1)), N -> add (X, lshr(X, N))
Alive2 Proofs: https://alive2.llvm.org/ce/z/eSinJY https://alive2.llvm.org/ce/z/sweDgc
1 parent 1bf045e commit b7dbeec

File tree

3 files changed

+69
-33
lines changed

3 files changed

+69
-33
lines changed

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1457,13 +1457,24 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
14571457

14581458
const APInt *MulC;
14591459
if (match(Op0, m_NUWMul(m_Value(X), m_APInt(MulC)))) {
1460-
// Look for a "splat" mul pattern - it replicates bits across each half of
1461-
// a value, so a right shift is just a mask of the low bits:
1462-
// lshr i[2N] (mul nuw X, (2^N)+1), N --> and iN X, (2^N)-1
1463-
// TODO: Generalize to allow more than just half-width shifts?
1464-
if (BitWidth > 2 && ShAmtC * 2 == BitWidth && (*MulC - 1).isPowerOf2() &&
1465-
MulC->logBase2() == ShAmtC)
1466-
return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *MulC - 2));
1460+
if (BitWidth > 2 && (*MulC - 1).isPowerOf2() &&
1461+
MulC->logBase2() == ShAmtC) {
1462+
// Look for a "splat" mul pattern - it replicates bits across each half
1463+
// of a value, so a right shift is just a mask of the low bits:
1464+
// lshr i[2N] (mul nuw X, (2^N)+1), N --> and iN X, (2^N)-1
1465+
if (ShAmtC * 2 == BitWidth)
1466+
return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *MulC - 2));
1467+
1468+
// lshr (mul nuw (X, 2^N + 1)), N -> add nuw (X, lshr(X, N))
1469+
if (Op0->hasOneUse()) {
1470+
auto *NewAdd = BinaryOperator::CreateNUWAdd(
1471+
X, Builder.CreateLShr(X, ConstantInt::get(Ty, ShAmtC), "",
1472+
I.isExact()));
1473+
NewAdd->setHasNoSignedWrap(
1474+
cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap());
1475+
return NewAdd;
1476+
}
1477+
}
14671478

14681479
// The one-use check is not strictly necessary, but codegen may not be
14691480
// able to invert the transform and perf may suffer with an extra mul
@@ -1483,6 +1494,16 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
14831494
}
14841495
}
14851496

1497+
// lshr (mul nsw (X, 2^N + 1)), N -> add nsw (X, lshr(X, N))
1498+
if (match(Op0, m_OneUse(m_NSWMul(m_Value(X), m_APInt(MulC))))) {
1499+
if (BitWidth > 2 && (*MulC - 1).isPowerOf2() &&
1500+
MulC->logBase2() == ShAmtC) {
1501+
return BinaryOperator::CreateNSWAdd(
1502+
X, Builder.CreateLShr(X, ConstantInt::get(Ty, ShAmtC), "",
1503+
I.isExact()));
1504+
}
1505+
}
1506+
14861507
// Try to narrow bswap.
14871508
// In the case where the shift amount equals the bitwidth difference, the
14881509
// shift is eliminated.
@@ -1686,6 +1707,21 @@ Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) {
16861707
if (match(Op0, m_OneUse(m_NSWSub(m_Value(X), m_Value(Y)))))
16871708
return new SExtInst(Builder.CreateICmpSLT(X, Y), Ty);
16881709
}
1710+
1711+
const APInt *MulC;
1712+
if (match(Op0, m_OneUse(m_NSWMul(m_Value(X), m_APInt(MulC)))) &&
1713+
(BitWidth > 2 && (*MulC - 1).isPowerOf2() &&
1714+
MulC->logBase2() == ShAmt &&
1715+
(ShAmt < BitWidth - 1))) /* Minus 1 for the sign bit */ {
1716+
1717+
// ashr (mul nsw (X, 2^N + 1)), N -> add nsw (X, ashr(X, N))
1718+
auto *NewAdd = BinaryOperator::CreateNSWAdd(
1719+
X,
1720+
Builder.CreateAShr(X, ConstantInt::get(Ty, ShAmt), "", I.isExact()));
1721+
NewAdd->setHasNoUnsignedWrap(
1722+
cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap());
1723+
return NewAdd;
1724+
}
16891725
}
16901726

16911727
const SimplifyQuery Q = SQ.getWithInstruction(&I);

llvm/test/Transforms/InstCombine/ashr-lshr.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -607,8 +607,8 @@ define <2 x i8> @ashr_known_pos_exact_vec(<2 x i8> %x, <2 x i8> %y) {
607607

608608
define i32 @lshr_mul_times_3_div_2(i32 %0) {
609609
; CHECK-LABEL: @lshr_mul_times_3_div_2(
610-
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[TMP0:%.*]], 3
611-
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[MUL]], 1
610+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 1
611+
; CHECK-NEXT: [[LSHR:%.*]] = add nuw nsw i32 [[TMP2]], [[TMP0]]
612612
; CHECK-NEXT: ret i32 [[LSHR]]
613613
;
614614
%mul = mul nsw nuw i32 %0, 3
@@ -618,8 +618,8 @@ define i32 @lshr_mul_times_3_div_2(i32 %0) {
618618

619619
define i32 @lshr_mul_times_3_div_2_exact(i32 %x) {
620620
; CHECK-LABEL: @lshr_mul_times_3_div_2_exact(
621-
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[X:%.*]], 3
622-
; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i32 [[MUL]], 1
621+
; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 1
622+
; CHECK-NEXT: [[LSHR:%.*]] = add nsw i32 [[TMP1]], [[X]]
623623
; CHECK-NEXT: ret i32 [[LSHR]]
624624
;
625625
%mul = mul nsw i32 %x, 3
@@ -657,8 +657,8 @@ define i32 @mul_times_3_div_2_multiuse_lshr(i32 %x) {
657657

658658
define i32 @lshr_mul_times_3_div_2_exact_2(i32 %x) {
659659
; CHECK-LABEL: @lshr_mul_times_3_div_2_exact_2(
660-
; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[X:%.*]], 3
661-
; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i32 [[MUL]], 1
660+
; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 1
661+
; CHECK-NEXT: [[LSHR:%.*]] = add nuw i32 [[TMP1]], [[X]]
662662
; CHECK-NEXT: ret i32 [[LSHR]]
663663
;
664664
%mul = mul nuw i32 %x, 3
@@ -668,8 +668,8 @@ define i32 @lshr_mul_times_3_div_2_exact_2(i32 %x) {
668668

669669
define i32 @lshr_mul_times_5_div_4(i32 %0) {
670670
; CHECK-LABEL: @lshr_mul_times_5_div_4(
671-
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[TMP0:%.*]], 5
672-
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[MUL]], 2
671+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 2
672+
; CHECK-NEXT: [[LSHR:%.*]] = add nuw nsw i32 [[TMP2]], [[TMP0]]
673673
; CHECK-NEXT: ret i32 [[LSHR]]
674674
;
675675
%mul = mul nsw nuw i32 %0, 5
@@ -679,8 +679,8 @@ define i32 @lshr_mul_times_5_div_4(i32 %0) {
679679

680680
define i32 @lshr_mul_times_5_div_4_exact(i32 %x) {
681681
; CHECK-LABEL: @lshr_mul_times_5_div_4_exact(
682-
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[X:%.*]], 5
683-
; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i32 [[MUL]], 2
682+
; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 2
683+
; CHECK-NEXT: [[LSHR:%.*]] = add nsw i32 [[TMP1]], [[X]]
684684
; CHECK-NEXT: ret i32 [[LSHR]]
685685
;
686686
%mul = mul nsw i32 %x, 5
@@ -718,8 +718,8 @@ define i32 @mul_times_5_div_4_multiuse_lshr(i32 %x) {
718718

719719
define i32 @lshr_mul_times_5_div_4_exact_2(i32 %x) {
720720
; CHECK-LABEL: @lshr_mul_times_5_div_4_exact_2(
721-
; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[X:%.*]], 5
722-
; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i32 [[MUL]], 2
721+
; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 2
722+
; CHECK-NEXT: [[LSHR:%.*]] = add nuw i32 [[TMP1]], [[X]]
723723
; CHECK-NEXT: ret i32 [[LSHR]]
724724
;
725725
%mul = mul nuw i32 %x, 5
@@ -729,8 +729,8 @@ define i32 @lshr_mul_times_5_div_4_exact_2(i32 %x) {
729729

730730
define i32 @ashr_mul_times_3_div_2(i32 %0) {
731731
; CHECK-LABEL: @ashr_mul_times_3_div_2(
732-
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[TMP0:%.*]], 3
733-
; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 [[MUL]], 1
732+
; CHECK-NEXT: [[TMP2:%.*]] = ashr i32 [[TMP0:%.*]], 1
733+
; CHECK-NEXT: [[ASHR:%.*]] = add nuw nsw i32 [[TMP2]], [[TMP0]]
734734
; CHECK-NEXT: ret i32 [[ASHR]]
735735
;
736736
%mul = mul nuw nsw i32 %0, 3
@@ -740,8 +740,8 @@ define i32 @ashr_mul_times_3_div_2(i32 %0) {
740740

741741
define i32 @ashr_mul_times_3_div_2_exact(i32 %x) {
742742
; CHECK-LABEL: @ashr_mul_times_3_div_2_exact(
743-
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[X:%.*]], 3
744-
; CHECK-NEXT: [[ASHR:%.*]] = ashr exact i32 [[MUL]], 1
743+
; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i32 [[X:%.*]], 1
744+
; CHECK-NEXT: [[ASHR:%.*]] = add nsw i32 [[TMP1]], [[X]]
745745
; CHECK-NEXT: ret i32 [[ASHR]]
746746
;
747747
%mul = mul nsw i32 %x, 3
@@ -792,8 +792,8 @@ define i32 @mul_times_3_div_2_multiuse_ashr(i32 %x) {
792792

793793
define i32 @ashr_mul_times_3_div_2_exact_2(i32 %x) {
794794
; CHECK-LABEL: @ashr_mul_times_3_div_2_exact_2(
795-
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[X:%.*]], 3
796-
; CHECK-NEXT: [[ASHR:%.*]] = ashr exact i32 [[MUL]], 1
795+
; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i32 [[X:%.*]], 1
796+
; CHECK-NEXT: [[ASHR:%.*]] = add nsw i32 [[TMP1]], [[X]]
797797
; CHECK-NEXT: ret i32 [[ASHR]]
798798
;
799799
%mul = mul nsw i32 %x, 3
@@ -803,8 +803,8 @@ define i32 @ashr_mul_times_3_div_2_exact_2(i32 %x) {
803803

804804
define i32 @ashr_mul_times_5_div_4(i32 %0) {
805805
; CHECK-LABEL: @ashr_mul_times_5_div_4(
806-
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[TMP0:%.*]], 5
807-
; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 [[MUL]], 2
806+
; CHECK-NEXT: [[TMP2:%.*]] = ashr i32 [[TMP0:%.*]], 2
807+
; CHECK-NEXT: [[ASHR:%.*]] = add nuw nsw i32 [[TMP2]], [[TMP0]]
808808
; CHECK-NEXT: ret i32 [[ASHR]]
809809
;
810810
%mul = mul nuw nsw i32 %0, 5
@@ -814,8 +814,8 @@ define i32 @ashr_mul_times_5_div_4(i32 %0) {
814814

815815
define i32 @ashr_mul_times_5_div_4_exact(i32 %x) {
816816
; CHECK-LABEL: @ashr_mul_times_5_div_4_exact(
817-
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[X:%.*]], 5
818-
; CHECK-NEXT: [[ASHR:%.*]] = ashr exact i32 [[MUL]], 2
817+
; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i32 [[X:%.*]], 2
818+
; CHECK-NEXT: [[ASHR:%.*]] = add nsw i32 [[TMP1]], [[X]]
819819
; CHECK-NEXT: ret i32 [[ASHR]]
820820
;
821821
%mul = mul nsw i32 %x, 5
@@ -853,8 +853,8 @@ define i32 @mul_times_5_div_4_multiuse_ashr(i32 %x) {
853853

854854
define i32 @ashr_mul_times_5_div_4_exact_2(i32 %x) {
855855
; CHECK-LABEL: @ashr_mul_times_5_div_4_exact_2(
856-
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[X:%.*]], 5
857-
; CHECK-NEXT: [[ASHR:%.*]] = ashr exact i32 [[MUL]], 2
856+
; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i32 [[X:%.*]], 2
857+
; CHECK-NEXT: [[ASHR:%.*]] = add nsw i32 [[TMP1]], [[X]]
858858
; CHECK-NEXT: ret i32 [[ASHR]]
859859
;
860860
%mul = mul nsw i32 %x, 5

llvm/test/Transforms/InstCombine/lshr.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -632,8 +632,8 @@ define i32 @mul_splat_fold_wrong_lshr_const(i32 %x) {
632632

633633
define i32 @mul_splat_fold_no_nuw(i32 %x) {
634634
; CHECK-LABEL: @mul_splat_fold_no_nuw(
635-
; CHECK-NEXT: [[M:%.*]] = mul nsw i32 [[X:%.*]], 65537
636-
; CHECK-NEXT: [[T:%.*]] = lshr i32 [[M]], 16
635+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 16
636+
; CHECK-NEXT: [[T:%.*]] = add nsw i32 [[TMP1]], [[X]]
637637
; CHECK-NEXT: ret i32 [[T]]
638638
;
639639
%m = mul nsw i32 %x, 65537

0 commit comments

Comments
 (0)