Skip to content

Commit c821401

Browse files
committed
[InstCombine] Fold ((X << nuw Z) binop nuw Y) >>u Z --> X binop nuw (Y >>u Z)
Proofs: https://alive2.llvm.org/ce/z/N9dRzP https://alive2.llvm.org/ce/z/Xrpc-Y
1 parent 0826b05 commit c821401

File tree

2 files changed

+68
-34
lines changed

2 files changed

+68
-34
lines changed

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,6 +1259,53 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
12591259
match(Op1, m_SpecificIntAllowPoison(BitWidth - 1)))
12601260
return new ZExtInst(Builder.CreateIsNotNeg(X, "isnotneg"), Ty);
12611261

1262+
// ((X << nuw Z) sub nuw Y) >>u exact Z --> X sub nuw (Y >>u exact Z),
1263+
// ONLY if I is exact, and both the shift and sub are nuw
1264+
Value *Y;
1265+
if (I.isExact() &&
1266+
match(Op0, m_OneUse(m_NUWSub(m_NUWShl(m_Value(X), m_Specific(Op1)),
1267+
m_Value(Y))))) {
1268+
Value *NewLshr = Builder.CreateLShr(Y, Op1, "", /* isExact */ true);
1269+
auto *NewSub = BinaryOperator::CreateNUWSub(X, NewLshr);
1270+
NewSub->setHasNoSignedWrap(
1271+
cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap());
1272+
return NewSub;
1273+
}
1274+
1275+
auto isSuitableBinOpcode = [](Instruction::BinaryOps BinOpcode) {
1276+
switch (BinOpcode) {
1277+
default:
1278+
return false;
1279+
case Instruction::Add:
1280+
case Instruction::Or:
1281+
case Instruction::Xor:
1282+
// And does not work here, and sub is handled separately.
1283+
return true;
1284+
}
1285+
};
1286+
1287+
// If both the binop and the shift are nuw, then:
1288+
// ((X << nuw Z) binop nuw Y) >>u Z --> X binop nuw (Y >>u Z)
1289+
if (match(Op0, m_OneUse(m_c_BinOp(m_NUWShl(m_Value(X), m_Specific(Op1)),
1290+
m_Value(Y))))) {
1291+
BinaryOperator *Op0OB = cast<BinaryOperator>(Op0);
1292+
if (isSuitableBinOpcode(Op0OB->getOpcode())) {
1293+
if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Op0);
1294+
!OBO || OBO->hasNoUnsignedWrap()) {
1295+
Value *NewLshr = Builder.CreateLShr(Y, Op1, "", I.isExact());
1296+
auto *NewBinOp = BinaryOperator::Create(Op0OB->getOpcode(), NewLshr, X);
1297+
if (OBO) {
1298+
NewBinOp->setHasNoUnsignedWrap(true);
1299+
NewBinOp->setHasNoSignedWrap(OBO->hasNoSignedWrap());
1300+
} else if (auto *Disjoint = dyn_cast<PossiblyDisjointInst>(Op0);
1301+
Disjoint && Disjoint->isDisjoint()) {
1302+
cast<PossiblyDisjointInst>(NewBinOp)->setIsDisjoint(true);
1303+
}
1304+
return NewBinOp;
1305+
}
1306+
}
1307+
}
1308+
12621309
if (match(Op1, m_APInt(C))) {
12631310
unsigned ShAmtC = C->getZExtValue();
12641311
auto *II = dyn_cast<IntrinsicInst>(Op0);
@@ -1275,7 +1322,6 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
12751322
return new ZExtInst(Cmp, Ty);
12761323
}
12771324

1278-
Value *X;
12791325
const APInt *C1;
12801326
if (match(Op0, m_Shl(m_Value(X), m_APInt(C1))) && C1->ult(BitWidth)) {
12811327
if (C1->ult(ShAmtC)) {
@@ -1320,7 +1366,6 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
13201366
// ((X << C) + Y) >>u C --> (X + (Y >>u C)) & (-1 >>u C)
13211367
// TODO: Consolidate with the more general transform that starts from shl
13221368
// (the shifts are in the opposite order).
1323-
Value *Y;
13241369
if (match(Op0,
13251370
m_OneUse(m_c_Add(m_OneUse(m_Shl(m_Value(X), m_Specific(Op1))),
13261371
m_Value(Y))))) {

llvm/test/Transforms/InstCombine/lshr.ll

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,7 @@ define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) {
165165

166166
define <2 x i8> @lshr_exact_splat_vec_nuw(<2 x i8> %x) {
167167
; CHECK-LABEL: @lshr_exact_splat_vec_nuw(
168-
; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 1, i8 1>
169-
; CHECK-NEXT: [[LSHR:%.*]] = and <2 x i8> [[TMP1]], <i8 63, i8 63>
168+
; CHECK-NEXT: [[LSHR:%.*]] = add nuw <2 x i8> [[X:%.*]], <i8 1, i8 1>
170169
; CHECK-NEXT: ret <2 x i8> [[LSHR]]
171170
;
172171
%shl = shl nuw <2 x i8> %x, <i8 2, i8 2>
@@ -374,9 +373,8 @@ define <3 x i14> @mul_splat_fold_vec(<3 x i14> %x) {
374373

375374
define i32 @shl_add_lshr_flag_preservation(i32 %x, i32 %c, i32 %y) {
376375
; CHECK-LABEL: @shl_add_lshr_flag_preservation(
377-
; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 [[X:%.*]], [[C:%.*]]
378-
; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[SHL]], [[Y:%.*]]
379-
; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i32 [[ADD]], [[C]]
376+
; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[Y:%.*]], [[C:%.*]]
377+
; CHECK-NEXT: [[LSHR:%.*]] = add nuw nsw i32 [[TMP1]], [[X:%.*]]
380378
; CHECK-NEXT: ret i32 [[LSHR]]
381379
;
382380
%shl = shl nuw i32 %x, %c
@@ -387,9 +385,8 @@ define i32 @shl_add_lshr_flag_preservation(i32 %x, i32 %c, i32 %y) {
387385

388386
define i32 @shl_add_lshr(i32 %x, i32 %c, i32 %y) {
389387
; CHECK-LABEL: @shl_add_lshr(
390-
; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 [[X:%.*]], [[C:%.*]]
391-
; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[SHL]], [[Y:%.*]]
392-
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[ADD]], [[C]]
388+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]]
389+
; CHECK-NEXT: [[LSHR:%.*]] = add nuw i32 [[TMP1]], [[X:%.*]]
393390
; CHECK-NEXT: ret i32 [[LSHR]]
394391
;
395392
%shl = shl nuw i32 %x, %c
@@ -400,9 +397,8 @@ define i32 @shl_add_lshr(i32 %x, i32 %c, i32 %y) {
400397

401398
define i32 @shl_add_lshr_comm(i32 %x, i32 %c, i32 %y) {
402399
; CHECK-LABEL: @shl_add_lshr_comm(
403-
; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 [[X:%.*]], [[C:%.*]]
404-
; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[SHL]], [[Y:%.*]]
405-
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[ADD]], [[C]]
400+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]]
401+
; CHECK-NEXT: [[LSHR:%.*]] = add nuw i32 [[TMP1]], [[X:%.*]]
406402
; CHECK-NEXT: ret i32 [[LSHR]]
407403
;
408404
%shl = shl nuw i32 %x, %c
@@ -443,9 +439,8 @@ define i32 @shl_sub_lshr_no_nuw(i32 %x, i32 %c, i32 %y) {
443439

444440
define i32 @shl_sub_lshr(i32 %x, i32 %c, i32 %y) {
445441
; CHECK-LABEL: @shl_sub_lshr(
446-
; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 [[X:%.*]], [[C:%.*]]
447-
; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 [[SHL]], [[Y:%.*]]
448-
; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i32 [[SUB]], [[C]]
442+
; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[Y:%.*]], [[C:%.*]]
443+
; CHECK-NEXT: [[LSHR:%.*]] = sub nuw nsw i32 [[X:%.*]], [[TMP1]]
449444
; CHECK-NEXT: ret i32 [[LSHR]]
450445
;
451446
%shl = shl nuw i32 %x, %c
@@ -456,9 +451,8 @@ define i32 @shl_sub_lshr(i32 %x, i32 %c, i32 %y) {
456451

457452
define i32 @shl_or_lshr(i32 %x, i32 %c, i32 %y) {
458453
; CHECK-LABEL: @shl_or_lshr(
459-
; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 [[X:%.*]], [[C:%.*]]
460-
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[Y:%.*]]
461-
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[OR]], [[C]]
454+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]]
455+
; CHECK-NEXT: [[LSHR:%.*]] = or i32 [[TMP1]], [[X:%.*]]
462456
; CHECK-NEXT: ret i32 [[LSHR]]
463457
;
464458
%shl = shl nuw i32 %x, %c
@@ -469,9 +463,8 @@ define i32 @shl_or_lshr(i32 %x, i32 %c, i32 %y) {
469463

470464
define i32 @shl_or_disjoint_lshr(i32 %x, i32 %c, i32 %y) {
471465
; CHECK-LABEL: @shl_or_disjoint_lshr(
472-
; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 [[X:%.*]], [[C:%.*]]
473-
; CHECK-NEXT: [[OR:%.*]] = or disjoint i32 [[SHL]], [[Y:%.*]]
474-
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[OR]], [[C]]
466+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]]
467+
; CHECK-NEXT: [[LSHR:%.*]] = or disjoint i32 [[TMP1]], [[X:%.*]]
475468
; CHECK-NEXT: ret i32 [[LSHR]]
476469
;
477470
%shl = shl nuw i32 %x, %c
@@ -482,9 +475,8 @@ define i32 @shl_or_disjoint_lshr(i32 %x, i32 %c, i32 %y) {
482475

483476
define i32 @shl_or_lshr_comm(i32 %x, i32 %c, i32 %y) {
484477
; CHECK-LABEL: @shl_or_lshr_comm(
485-
; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 [[X:%.*]], [[C:%.*]]
486-
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[Y:%.*]]
487-
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[OR]], [[C]]
478+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]]
479+
; CHECK-NEXT: [[LSHR:%.*]] = or i32 [[TMP1]], [[X:%.*]]
488480
; CHECK-NEXT: ret i32 [[LSHR]]
489481
;
490482
%shl = shl nuw i32 %x, %c
@@ -495,9 +487,8 @@ define i32 @shl_or_lshr_comm(i32 %x, i32 %c, i32 %y) {
495487

496488
define i32 @shl_or_disjoint_lshr_comm(i32 %x, i32 %c, i32 %y) {
497489
; CHECK-LABEL: @shl_or_disjoint_lshr_comm(
498-
; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 [[X:%.*]], [[C:%.*]]
499-
; CHECK-NEXT: [[OR:%.*]] = or disjoint i32 [[SHL]], [[Y:%.*]]
500-
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[OR]], [[C]]
490+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]]
491+
; CHECK-NEXT: [[LSHR:%.*]] = or disjoint i32 [[TMP1]], [[X:%.*]]
501492
; CHECK-NEXT: ret i32 [[LSHR]]
502493
;
503494
%shl = shl nuw i32 %x, %c
@@ -508,9 +499,8 @@ define i32 @shl_or_disjoint_lshr_comm(i32 %x, i32 %c, i32 %y) {
508499

509500
define i32 @shl_xor_lshr(i32 %x, i32 %c, i32 %y) {
510501
; CHECK-LABEL: @shl_xor_lshr(
511-
; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 [[X:%.*]], [[C:%.*]]
512-
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[SHL]], [[Y:%.*]]
513-
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[XOR]], [[C]]
502+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]]
503+
; CHECK-NEXT: [[LSHR:%.*]] = xor i32 [[TMP1]], [[X:%.*]]
514504
; CHECK-NEXT: ret i32 [[LSHR]]
515505
;
516506
%shl = shl nuw i32 %x, %c
@@ -522,9 +512,8 @@ define i32 @shl_xor_lshr(i32 %x, i32 %c, i32 %y) {
522512

523513
define i32 @shl_xor_lshr_comm(i32 %x, i32 %c, i32 %y) {
524514
; CHECK-LABEL: @shl_xor_lshr_comm(
525-
; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 [[X:%.*]], [[C:%.*]]
526-
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[SHL]], [[Y:%.*]]
527-
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[XOR]], [[C]]
515+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]]
516+
; CHECK-NEXT: [[LSHR:%.*]] = xor i32 [[TMP1]], [[X:%.*]]
528517
; CHECK-NEXT: ret i32 [[LSHR]]
529518
;
530519
%shl = shl nuw i32 %x, %c

0 commit comments

Comments
 (0)