Skip to content

Commit 5b3b1bb

Browse files
committed
[InstCombine] Refactor matchFunnelShift to allow more pattern (NFC)
Current implementation of matchFunnelShift only allows opposite shift pattern. Refactor it to allow more pattern.
1 parent 6afceba commit 5b3b1bb

File tree

1 file changed

+93
-79
lines changed

1 file changed

+93
-79
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

Lines changed: 93 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -2732,100 +2732,114 @@ static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) {
27322732
// rotate matching code under visitSelect and visitTrunc?
27332733
unsigned Width = Or.getType()->getScalarSizeInBits();
27342734

2735-
// First, find an or'd pair of opposite shifts:
2736-
// or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)
2737-
BinaryOperator *Or0, *Or1;
2738-
if (!match(Or.getOperand(0), m_BinOp(Or0)) ||
2739-
!match(Or.getOperand(1), m_BinOp(Or1)))
2740-
return nullptr;
2741-
2742-
Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
2743-
if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
2744-
!match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
2745-
Or0->getOpcode() == Or1->getOpcode())
2735+
Instruction *Or0, *Or1;
2736+
if (!match(Or.getOperand(0), m_Instruction(Or0)) ||
2737+
!match(Or.getOperand(1), m_Instruction(Or1)))
27462738
return nullptr;
27472739

2748-
// Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
2749-
if (Or0->getOpcode() == BinaryOperator::LShr) {
2750-
std::swap(Or0, Or1);
2751-
std::swap(ShVal0, ShVal1);
2752-
std::swap(ShAmt0, ShAmt1);
2753-
}
2754-
assert(Or0->getOpcode() == BinaryOperator::Shl &&
2755-
Or1->getOpcode() == BinaryOperator::LShr &&
2756-
"Illegal or(shift,shift) pair");
2757-
2758-
// Match the shift amount operands for a funnel shift pattern. This always
2759-
// matches a subtraction on the R operand.
2760-
auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
2761-
// Check for constant shift amounts that sum to the bitwidth.
2762-
const APInt *LI, *RI;
2763-
if (match(L, m_APIntAllowUndef(LI)) && match(R, m_APIntAllowUndef(RI)))
2764-
if (LI->ult(Width) && RI->ult(Width) && (*LI + *RI) == Width)
2765-
return ConstantInt::get(L->getType(), *LI);
2766-
2767-
Constant *LC, *RC;
2768-
if (match(L, m_Constant(LC)) && match(R, m_Constant(RC)) &&
2769-
match(L, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&
2770-
match(R, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&
2771-
match(ConstantExpr::getAdd(LC, RC), m_SpecificIntAllowUndef(Width)))
2772-
return ConstantExpr::mergeUndefsWith(LC, RC);
2773-
2774-
// (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.
2775-
// We limit this to X < Width in case the backend re-expands the intrinsic,
2776-
// and has to reintroduce a shift modulo operation (InstCombine might remove
2777-
// it after this fold). This still doesn't guarantee that the final codegen
2778-
// will match this original pattern.
2779-
if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) {
2780-
KnownBits KnownL = IC.computeKnownBits(L, /*Depth*/ 0, &Or);
2781-
return KnownL.getMaxValue().ult(Width) ? L : nullptr;
2782-
}
2740+
bool IsFshl = true; // Sub on LSHR.
2741+
SmallVector<Value *, 3> FShiftArgs;
27832742

2784-
// For non-constant cases, the following patterns currently only work for
2785-
// rotation patterns.
2786-
// TODO: Add general funnel-shift compatible patterns.
2787-
if (ShVal0 != ShVal1)
2743+
// First, find an or'd pair of opposite shifts:
2744+
// or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)
2745+
if (isa<BinaryOperator>(Or0) && isa<BinaryOperator>(Or1)) {
2746+
Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
2747+
if (!match(Or0,
2748+
m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
2749+
!match(Or1,
2750+
m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
2751+
Or0->getOpcode() == Or1->getOpcode())
27882752
return nullptr;
27892753

2790-
// For non-constant cases we don't support non-pow2 shift masks.
2791-
// TODO: Is it worth matching urem as well?
2792-
if (!isPowerOf2_32(Width))
2793-
return nullptr;
2754+
// Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
2755+
if (Or0->getOpcode() == BinaryOperator::LShr) {
2756+
std::swap(Or0, Or1);
2757+
std::swap(ShVal0, ShVal1);
2758+
std::swap(ShAmt0, ShAmt1);
2759+
}
2760+
assert(Or0->getOpcode() == BinaryOperator::Shl &&
2761+
Or1->getOpcode() == BinaryOperator::LShr &&
2762+
"Illegal or(shift,shift) pair");
2763+
2764+
// Match the shift amount operands for a funnel shift pattern. This always
2765+
// matches a subtraction on the R operand.
2766+
auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
2767+
// Check for constant shift amounts that sum to the bitwidth.
2768+
const APInt *LI, *RI;
2769+
if (match(L, m_APIntAllowUndef(LI)) && match(R, m_APIntAllowUndef(RI)))
2770+
if (LI->ult(Width) && RI->ult(Width) && (*LI + *RI) == Width)
2771+
return ConstantInt::get(L->getType(), *LI);
2772+
2773+
Constant *LC, *RC;
2774+
if (match(L, m_Constant(LC)) && match(R, m_Constant(RC)) &&
2775+
match(L,
2776+
m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&
2777+
match(R,
2778+
m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&
2779+
match(ConstantExpr::getAdd(LC, RC), m_SpecificIntAllowUndef(Width)))
2780+
return ConstantExpr::mergeUndefsWith(LC, RC);
2781+
2782+
// (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.
2783+
// We limit this to X < Width in case the backend re-expands the
2784+
// intrinsic, and has to reintroduce a shift modulo operation (InstCombine
2785+
// might remove it after this fold). This still doesn't guarantee that the
2786+
// final codegen will match this original pattern.
2787+
if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) {
2788+
KnownBits KnownL = IC.computeKnownBits(L, /*Depth*/ 0, &Or);
2789+
return KnownL.getMaxValue().ult(Width) ? L : nullptr;
2790+
}
27942791

2795-
// The shift amount may be masked with negation:
2796-
// (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
2797-
Value *X;
2798-
unsigned Mask = Width - 1;
2799-
if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
2800-
match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
2801-
return X;
2792+
// For non-constant cases, the following patterns currently only work for
2793+
// rotation patterns.
2794+
// TODO: Add general funnel-shift compatible patterns.
2795+
if (ShVal0 != ShVal1)
2796+
return nullptr;
28022797

2803-
// Similar to above, but the shift amount may be extended after masking,
2804-
// so return the extended value as the parameter for the intrinsic.
2805-
if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
2806-
match(R, m_And(m_Neg(m_ZExt(m_And(m_Specific(X), m_SpecificInt(Mask)))),
2807-
m_SpecificInt(Mask))))
2808-
return L;
2798+
// For non-constant cases we don't support non-pow2 shift masks.
2799+
// TODO: Is it worth matching urem as well?
2800+
if (!isPowerOf2_32(Width))
2801+
return nullptr;
28092802

2810-
if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
2811-
match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))
2812-
return L;
2803+
// The shift amount may be masked with negation:
2804+
// (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
2805+
Value *X;
2806+
unsigned Mask = Width - 1;
2807+
if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
2808+
match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
2809+
return X;
2810+
2811+
// Similar to above, but the shift amount may be extended after masking,
2812+
// so return the extended value as the parameter for the intrinsic.
2813+
if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
2814+
match(R,
2815+
m_And(m_Neg(m_ZExt(m_And(m_Specific(X), m_SpecificInt(Mask)))),
2816+
m_SpecificInt(Mask))))
2817+
return L;
2818+
2819+
if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
2820+
match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))
2821+
return L;
28132822

2814-
return nullptr;
2815-
};
2823+
return nullptr;
2824+
};
28162825

2817-
Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width);
2818-
bool IsFshl = true; // Sub on LSHR.
2819-
if (!ShAmt) {
2820-
ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width);
2821-
IsFshl = false; // Sub on SHL.
2826+
Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width);
2827+
if (!ShAmt) {
2828+
ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width);
2829+
IsFshl = false; // Sub on SHL.
2830+
}
2831+
if (!ShAmt)
2832+
return nullptr;
2833+
2834+
FShiftArgs = {ShVal0, ShVal1, ShAmt};
28222835
}
2823-
if (!ShAmt)
2836+
2837+
if (FShiftArgs.empty())
28242838
return nullptr;
28252839

28262840
Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
28272841
Function *F = Intrinsic::getDeclaration(Or.getModule(), IID, Or.getType());
2828-
return CallInst::Create(F, {ShVal0, ShVal1, ShAmt});
2842+
return CallInst::Create(F, FShiftArgs);
28292843
}
28302844

28312845
/// Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns.

0 commit comments

Comments
 (0)