Skip to content

Commit cdfbd2e

Browse files
committed
[InstCombine] lshr (mul (X, 2^N + 1)), N -> add (X, lshr(X, N))
Alive2 Proof: https://alive2.llvm.org/ce/z/eSinJY
1 parent a145ff1 commit cdfbd2e

File tree

3 files changed

+42
-21
lines changed

3 files changed

+42
-21
lines changed

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1457,13 +1457,24 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
14571457

14581458
const APInt *MulC;
14591459
if (match(Op0, m_NUWMul(m_Value(X), m_APInt(MulC)))) {
1460-
// Look for a "splat" mul pattern - it replicates bits across each half of
1461-
// a value, so a right shift is just a mask of the low bits:
1462-
// lshr i[2N] (mul nuw X, (2^N)+1), N --> and iN X, (2^N)-1
1463-
// TODO: Generalize to allow more than just half-width shifts?
1464-
if (BitWidth > 2 && ShAmtC * 2 == BitWidth && (*MulC - 1).isPowerOf2() &&
1465-
MulC->logBase2() == ShAmtC)
1466-
return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *MulC - 2));
1460+
if (BitWidth > 2 && (*MulC - 1).isPowerOf2() &&
1461+
MulC->logBase2() == ShAmtC) {
1462+
// Look for a "splat" mul pattern - it replicates bits across each half
1463+
// of a value, so a right shift is just a mask of the low bits:
1464+
// lshr i[2N] (mul nuw X, (2^N)+1), N --> and iN X, (2^N)-1
1465+
if (ShAmtC * 2 == BitWidth)
1466+
return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *MulC - 2));
1467+
1468+
// lshr (mul nuw (X, 2^N + 1)), N -> add nuw (X, lshr(X, N))
1469+
if (Op0->hasOneUse()) {
1470+
auto *NewAdd = BinaryOperator::CreateNUWAdd(
1471+
X, Builder.CreateLShr(X, ConstantInt::get(Ty, ShAmtC), "",
1472+
I.isExact()));
1473+
NewAdd->setHasNoSignedWrap(
1474+
cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap());
1475+
return NewAdd;
1476+
}
1477+
}
14671478

14681479
// The one-use check is not strictly necessary, but codegen may not be
14691480
// able to invert the transform and perf may suffer with an extra mul
@@ -1483,6 +1494,16 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
14831494
}
14841495
}
14851496

1497+
// lshr (mul nsw (X, 2^N + 1)), N -> add nsw (X, lshr(X, N))
1498+
if (match(Op0, m_OneUse(m_NSWMul(m_Value(X), m_APInt(MulC))))) {
1499+
if (BitWidth > 2 && (*MulC - 1).isPowerOf2() &&
1500+
MulC->logBase2() == ShAmtC) {
1501+
return BinaryOperator::CreateNSWAdd(
1502+
X, Builder.CreateLShr(X, ConstantInt::get(Ty, ShAmtC), "",
1503+
I.isExact()));
1504+
}
1505+
}
1506+
14861507
// Try to narrow bswap.
14871508
// In the case where the shift amount equals the bitwidth difference, the
14881509
// shift is eliminated.

llvm/test/Transforms/InstCombine/ashr-lshr.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -607,8 +607,8 @@ define <2 x i8> @ashr_known_pos_exact_vec(<2 x i8> %x, <2 x i8> %y) {
607607

608608
define i32 @lshr_mul_times_3_div_2(i32 %0) {
609609
; CHECK-LABEL: @lshr_mul_times_3_div_2(
610-
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[TMP0:%.*]], 3
611-
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[MUL]], 1
610+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 1
611+
; CHECK-NEXT: [[LSHR:%.*]] = add nuw nsw i32 [[TMP2]], [[TMP0]]
612612
; CHECK-NEXT: ret i32 [[LSHR]]
613613
;
614614
%mul = mul nsw nuw i32 %0, 3
@@ -618,8 +618,8 @@ define i32 @lshr_mul_times_3_div_2(i32 %0) {
618618

619619
define i32 @lshr_mul_times_3_div_2_exact(i32 %x) {
620620
; CHECK-LABEL: @lshr_mul_times_3_div_2_exact(
621-
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[X:%.*]], 3
622-
; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i32 [[MUL]], 1
621+
; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 1
622+
; CHECK-NEXT: [[LSHR:%.*]] = add nsw i32 [[TMP1]], [[X]]
623623
; CHECK-NEXT: ret i32 [[LSHR]]
624624
;
625625
%mul = mul nsw i32 %x, 3
@@ -657,8 +657,8 @@ define i32 @mul_times_3_div_2_multiuse_lshr(i32 %x) {
657657

658658
define i32 @lshr_mul_times_3_div_2_exact_2(i32 %x) {
659659
; CHECK-LABEL: @lshr_mul_times_3_div_2_exact_2(
660-
; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[X:%.*]], 3
661-
; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i32 [[MUL]], 1
660+
; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 1
661+
; CHECK-NEXT: [[LSHR:%.*]] = add nuw i32 [[TMP1]], [[X]]
662662
; CHECK-NEXT: ret i32 [[LSHR]]
663663
;
664664
%mul = mul nuw i32 %x, 3
@@ -668,8 +668,8 @@ define i32 @lshr_mul_times_3_div_2_exact_2(i32 %x) {
668668

669669
define i32 @lshr_mul_times_5_div_4(i32 %0) {
670670
; CHECK-LABEL: @lshr_mul_times_5_div_4(
671-
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[TMP0:%.*]], 5
672-
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[MUL]], 2
671+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 2
672+
; CHECK-NEXT: [[LSHR:%.*]] = add nuw nsw i32 [[TMP2]], [[TMP0]]
673673
; CHECK-NEXT: ret i32 [[LSHR]]
674674
;
675675
%mul = mul nsw nuw i32 %0, 5
@@ -679,8 +679,8 @@ define i32 @lshr_mul_times_5_div_4(i32 %0) {
679679

680680
define i32 @lshr_mul_times_5_div_4_exact(i32 %x) {
681681
; CHECK-LABEL: @lshr_mul_times_5_div_4_exact(
682-
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[X:%.*]], 5
683-
; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i32 [[MUL]], 2
682+
; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 2
683+
; CHECK-NEXT: [[LSHR:%.*]] = add nsw i32 [[TMP1]], [[X]]
684684
; CHECK-NEXT: ret i32 [[LSHR]]
685685
;
686686
%mul = mul nsw i32 %x, 5
@@ -718,8 +718,8 @@ define i32 @mul_times_5_div_4_multiuse_lshr(i32 %x) {
718718

719719
define i32 @lshr_mul_times_5_div_4_exact_2(i32 %x) {
720720
; CHECK-LABEL: @lshr_mul_times_5_div_4_exact_2(
721-
; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[X:%.*]], 5
722-
; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i32 [[MUL]], 2
721+
; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 2
722+
; CHECK-NEXT: [[LSHR:%.*]] = add nuw i32 [[TMP1]], [[X]]
723723
; CHECK-NEXT: ret i32 [[LSHR]]
724724
;
725725
%mul = mul nuw i32 %x, 5

llvm/test/Transforms/InstCombine/lshr.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -632,8 +632,8 @@ define i32 @mul_splat_fold_wrong_lshr_const(i32 %x) {
632632

633633
define i32 @mul_splat_fold_no_nuw(i32 %x) {
634634
; CHECK-LABEL: @mul_splat_fold_no_nuw(
635-
; CHECK-NEXT: [[M:%.*]] = mul nsw i32 [[X:%.*]], 65537
636-
; CHECK-NEXT: [[T:%.*]] = lshr i32 [[M]], 16
635+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 16
636+
; CHECK-NEXT: [[T:%.*]] = add nsw i32 [[TMP1]], [[X]]
637637
; CHECK-NEXT: ret i32 [[T]]
638638
;
639639
%m = mul nsw i32 %x, 65537

0 commit comments

Comments
 (0)