Skip to content

Commit edd80be

Browse files
committed
Reorder (shl (add/sub (shl x, C0), y), C1) -> (add/sub (shl x, C0 + C1), (shl y, C1))
This is just expanding the existing pattern that exists for AND/XOR/OR and gets a bit more parallelism in from the instruction sequence. Alive2: Add - https://alive2.llvm.org/ce/z/dSmPkV Sub1 - https://alive2.llvm.org/ce/z/6rpi5V Sub2 - https://alive2.llvm.org/ce/z/UfYeUd Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D141875
1 parent 8c88465 commit edd80be

File tree

2 files changed

+54
-37
lines changed

2 files changed

+54
-37
lines changed

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -322,22 +322,33 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
322322
return BinaryOperator::Create(Instruction::And, NewShift, NewMask);
323323
}
324324

325-
/// If we have a shift-by-constant of a bitwise logic op that itself has a
326-
/// shift-by-constant operand with identical opcode, we may be able to convert
327-
/// that into 2 independent shifts followed by the logic op. This eliminates a
328-
/// a use of an intermediate value (reduces dependency chain).
329-
static Instruction *foldShiftOfShiftedLogic(BinaryOperator &I,
325+
/// If we have a shift-by-constant of a bin op (bitwise logic op or add/sub w/
326+
/// shl) that itself has a shift-by-constant operand with identical opcode, we
327+
/// may be able to convert that into 2 independent shifts followed by the logic
328+
/// op. This eliminates a use of an intermediate value (reduces dependency
329+
/// chain).
330+
static Instruction *foldShiftOfShiftedBinOp(BinaryOperator &I,
330331
InstCombiner::BuilderTy &Builder) {
331332
assert(I.isShift() && "Expected a shift as input");
332-
auto *LogicInst = dyn_cast<BinaryOperator>(I.getOperand(0));
333-
if (!LogicInst || !LogicInst->isBitwiseLogicOp() || !LogicInst->hasOneUse())
333+
auto *BinInst = dyn_cast<BinaryOperator>(I.getOperand(0));
334+
if (!BinInst ||
335+
(!BinInst->isBitwiseLogicOp() &&
336+
BinInst->getOpcode() != Instruction::Add &&
337+
BinInst->getOpcode() != Instruction::Sub) ||
338+
!BinInst->hasOneUse())
334339
return nullptr;
335340

336341
Constant *C0, *C1;
337342
if (!match(I.getOperand(1), m_Constant(C1)))
338343
return nullptr;
339344

340345
Instruction::BinaryOps ShiftOpcode = I.getOpcode();
346+
// Transform for add/sub only works with shl.
347+
if ((BinInst->getOpcode() == Instruction::Add ||
348+
BinInst->getOpcode() == Instruction::Sub) &&
349+
ShiftOpcode != Instruction::Shl)
350+
return nullptr;
351+
341352
Type *Ty = I.getType();
342353

343354
// Find a matching one-use shift by constant. The fold is not valid if the sum
@@ -352,19 +363,25 @@ static Instruction *foldShiftOfShiftedLogic(BinaryOperator &I,
352363
m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold));
353364
};
354365

355-
// Logic ops are commutative, so check each operand for a match.
356-
if (matchFirstShift(LogicInst->getOperand(0)))
357-
Y = LogicInst->getOperand(1);
358-
else if (matchFirstShift(LogicInst->getOperand(1)))
359-
Y = LogicInst->getOperand(0);
360-
else
366+
// Logic ops and Add are commutative, so check each operand for a match. Sub
367+
// is not so we cannot reoder if we match operand(1) and need to keep the
368+
// operands in their original positions.
369+
bool FirstShiftIsOp1 = false;
370+
if (matchFirstShift(BinInst->getOperand(0)))
371+
Y = BinInst->getOperand(1);
372+
else if (matchFirstShift(BinInst->getOperand(1))) {
373+
Y = BinInst->getOperand(0);
374+
FirstShiftIsOp1 = BinInst->getOpcode() == Instruction::Sub;
375+
} else
361376
return nullptr;
362377

363-
// shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
378+
// shift (binop (shift X, C0), Y), C1 -> binop (shift X, C0+C1), (shift Y, C1)
364379
Constant *ShiftSumC = ConstantExpr::getAdd(C0, C1);
365380
Value *NewShift1 = Builder.CreateBinOp(ShiftOpcode, X, ShiftSumC);
366381
Value *NewShift2 = Builder.CreateBinOp(ShiftOpcode, Y, C1);
367-
return BinaryOperator::Create(LogicInst->getOpcode(), NewShift1, NewShift2);
382+
Value *Op1 = FirstShiftIsOp1 ? NewShift2 : NewShift1;
383+
Value *Op2 = FirstShiftIsOp1 ? NewShift1 : NewShift2;
384+
return BinaryOperator::Create(BinInst->getOpcode(), Op1, Op2);
368385
}
369386

370387
Instruction *InstCombinerImpl::commonShiftTransforms(BinaryOperator &I) {
@@ -463,7 +480,7 @@ Instruction *InstCombinerImpl::commonShiftTransforms(BinaryOperator &I) {
463480
return replaceOperand(I, 1, Rem);
464481
}
465482

466-
if (Instruction *Logic = foldShiftOfShiftedLogic(I, Builder))
483+
if (Instruction *Logic = foldShiftOfShiftedBinOp(I, Builder))
467484
return Logic;
468485

469486
return nullptr;

llvm/test/Transforms/InstCombine/shift-logic.ll

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -335,9 +335,9 @@ define i64 @lshr_mul_negative_nsw(i64 %0) {
335335

336336
define i8 @shl_add(i8 %x, i8 %y) {
337337
; CHECK-LABEL: @shl_add(
338-
; CHECK-NEXT: [[SH0:%.*]] = shl i8 [[X:%.*]], 3
339-
; CHECK-NEXT: [[R:%.*]] = add i8 [[SH0]], [[Y:%.*]]
340-
; CHECK-NEXT: [[SH1:%.*]] = shl i8 [[R]], 2
338+
; CHECK-NEXT: [[TMP1:%.*]] = shl i8 [[X:%.*]], 5
339+
; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[Y:%.*]], 2
340+
; CHECK-NEXT: [[SH1:%.*]] = add i8 [[TMP1]], [[TMP2]]
341341
; CHECK-NEXT: ret i8 [[SH1]]
342342
;
343343
%sh0 = shl i8 %x, 3
@@ -348,9 +348,9 @@ define i8 @shl_add(i8 %x, i8 %y) {
348348

349349
define <2 x i8> @shl_add_nonuniform(<2 x i8> %x, <2 x i8> %y) {
350350
; CHECK-LABEL: @shl_add_nonuniform(
351-
; CHECK-NEXT: [[SH0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 4>
352-
; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[SH0]], [[Y:%.*]]
353-
; CHECK-NEXT: [[SH1:%.*]] = shl <2 x i8> [[R]], <i8 2, i8 0>
351+
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 4>
352+
; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i8> [[Y:%.*]], <i8 2, i8 0>
353+
; CHECK-NEXT: [[SH1:%.*]] = add <2 x i8> [[TMP1]], [[TMP2]]
354354
; CHECK-NEXT: ret <2 x i8> [[SH1]]
355355
;
356356
%sh0 = shl <2 x i8> %x, <i8 3, i8 4>
@@ -363,9 +363,9 @@ define <2 x i8> @shl_add_nonuniform(<2 x i8> %x, <2 x i8> %y) {
363363
define <2 x i64> @shl_add_undef(<2 x i64> %x, <2 x i64> %py) {
364364
; CHECK-LABEL: @shl_add_undef(
365365
; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], <i64 42, i64 42>
366-
; CHECK-NEXT: [[SH0:%.*]] = shl <2 x i64> [[X:%.*]], <i64 5, i64 undef>
367-
; CHECK-NEXT: [[R:%.*]] = add <2 x i64> [[Y]], [[SH0]]
368-
; CHECK-NEXT: [[SH1:%.*]] = shl <2 x i64> [[R]], <i64 7, i64 undef>
366+
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[X:%.*]], <i64 12, i64 undef>
367+
; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[Y]], <i64 7, i64 undef>
368+
; CHECK-NEXT: [[SH1:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
369369
; CHECK-NEXT: ret <2 x i64> [[SH1]]
370370
;
371371
%y = srem <2 x i64> %py, <i64 42, i64 42> ; thwart complexity-based canonicalization
@@ -419,9 +419,9 @@ define <2 x i64> @lshr_add_undef(<2 x i64> %x, <2 x i64> %py) {
419419

420420
define i8 @shl_sub(i8 %x, i8 %y) {
421421
; CHECK-LABEL: @shl_sub(
422-
; CHECK-NEXT: [[SH0:%.*]] = shl i8 [[X:%.*]], 3
423-
; CHECK-NEXT: [[R:%.*]] = sub i8 [[SH0]], [[Y:%.*]]
424-
; CHECK-NEXT: [[SH1:%.*]] = shl i8 [[R]], 2
422+
; CHECK-NEXT: [[TMP1:%.*]] = shl i8 [[X:%.*]], 5
423+
; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[Y:%.*]], 2
424+
; CHECK-NEXT: [[SH1:%.*]] = sub i8 [[TMP1]], [[TMP2]]
425425
; CHECK-NEXT: ret i8 [[SH1]]
426426
;
427427
%sh0 = shl i8 %x, 3
@@ -433,9 +433,9 @@ define i8 @shl_sub(i8 %x, i8 %y) {
433433
; Make sure we don't commute operands for sub
434434
define i8 @shl_sub_no_commute(i8 %x, i8 %y) {
435435
; CHECK-LABEL: @shl_sub_no_commute(
436-
; CHECK-NEXT: [[SH0:%.*]] = shl i8 [[Y:%.*]], 3
437-
; CHECK-NEXT: [[R:%.*]] = sub i8 [[X:%.*]], [[SH0]]
438-
; CHECK-NEXT: [[SH1:%.*]] = shl i8 [[R]], 2
436+
; CHECK-NEXT: [[TMP1:%.*]] = shl i8 [[Y:%.*]], 5
437+
; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[X:%.*]], 2
438+
; CHECK-NEXT: [[SH1:%.*]] = sub i8 [[TMP2]], [[TMP1]]
439439
; CHECK-NEXT: ret i8 [[SH1]]
440440
;
441441
%sh0 = shl i8 %y, 3
@@ -446,9 +446,9 @@ define i8 @shl_sub_no_commute(i8 %x, i8 %y) {
446446

447447
define <2 x i8> @shl_sub_nonuniform(<2 x i8> %x, <2 x i8> %y) {
448448
; CHECK-LABEL: @shl_sub_nonuniform(
449-
; CHECK-NEXT: [[SH0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 4>
450-
; CHECK-NEXT: [[R:%.*]] = sub <2 x i8> [[SH0]], [[Y:%.*]]
451-
; CHECK-NEXT: [[SH1:%.*]] = shl <2 x i8> [[R]], <i8 2, i8 0>
449+
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 4>
450+
; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i8> [[Y:%.*]], <i8 2, i8 0>
451+
; CHECK-NEXT: [[SH1:%.*]] = sub <2 x i8> [[TMP1]], [[TMP2]]
452452
; CHECK-NEXT: ret <2 x i8> [[SH1]]
453453
;
454454
%sh0 = shl <2 x i8> %x, <i8 3, i8 4>
@@ -461,9 +461,9 @@ define <2 x i8> @shl_sub_nonuniform(<2 x i8> %x, <2 x i8> %y) {
461461
define <2 x i64> @shl_sub_undef(<2 x i64> %x, <2 x i64> %py) {
462462
; CHECK-LABEL: @shl_sub_undef(
463463
; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], <i64 42, i64 42>
464-
; CHECK-NEXT: [[SH0:%.*]] = shl <2 x i64> [[X:%.*]], <i64 5, i64 undef>
465-
; CHECK-NEXT: [[R:%.*]] = sub <2 x i64> [[Y]], [[SH0]]
466-
; CHECK-NEXT: [[SH1:%.*]] = shl <2 x i64> [[R]], <i64 7, i64 undef>
464+
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[X:%.*]], <i64 12, i64 undef>
465+
; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[Y]], <i64 7, i64 undef>
466+
; CHECK-NEXT: [[SH1:%.*]] = sub <2 x i64> [[TMP2]], [[TMP1]]
467467
; CHECK-NEXT: ret <2 x i64> [[SH1]]
468468
;
469469
%y = srem <2 x i64> %py, <i64 42, i64 42> ; thwart complexity-based canonicalization

0 commit comments

Comments
 (0)