Skip to content

Commit 8ff3e4f

Browse files
authored
[InstCombine] Refactor matchFunnelShift to allow more pattern (NFC) (#68474)
Current implementation of matchFunnelShift only allows opposite shift pattern. Refactor it to allow more pattern.
1 parent f16cb0e commit 8ff3e4f

File tree

1 file changed

+93
-79
lines changed

1 file changed

+93
-79
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

Lines changed: 93 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -2740,100 +2740,114 @@ static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) {
27402740
// rotate matching code under visitSelect and visitTrunc?
27412741
unsigned Width = Or.getType()->getScalarSizeInBits();
27422742

2743-
// First, find an or'd pair of opposite shifts:
2744-
// or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)
2745-
BinaryOperator *Or0, *Or1;
2746-
if (!match(Or.getOperand(0), m_BinOp(Or0)) ||
2747-
!match(Or.getOperand(1), m_BinOp(Or1)))
2748-
return nullptr;
2749-
2750-
Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
2751-
if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
2752-
!match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
2753-
Or0->getOpcode() == Or1->getOpcode())
2743+
Instruction *Or0, *Or1;
2744+
if (!match(Or.getOperand(0), m_Instruction(Or0)) ||
2745+
!match(Or.getOperand(1), m_Instruction(Or1)))
27542746
return nullptr;
27552747

2756-
// Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
2757-
if (Or0->getOpcode() == BinaryOperator::LShr) {
2758-
std::swap(Or0, Or1);
2759-
std::swap(ShVal0, ShVal1);
2760-
std::swap(ShAmt0, ShAmt1);
2761-
}
2762-
assert(Or0->getOpcode() == BinaryOperator::Shl &&
2763-
Or1->getOpcode() == BinaryOperator::LShr &&
2764-
"Illegal or(shift,shift) pair");
2765-
2766-
// Match the shift amount operands for a funnel shift pattern. This always
2767-
// matches a subtraction on the R operand.
2768-
auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
2769-
// Check for constant shift amounts that sum to the bitwidth.
2770-
const APInt *LI, *RI;
2771-
if (match(L, m_APIntAllowUndef(LI)) && match(R, m_APIntAllowUndef(RI)))
2772-
if (LI->ult(Width) && RI->ult(Width) && (*LI + *RI) == Width)
2773-
return ConstantInt::get(L->getType(), *LI);
2774-
2775-
Constant *LC, *RC;
2776-
if (match(L, m_Constant(LC)) && match(R, m_Constant(RC)) &&
2777-
match(L, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&
2778-
match(R, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&
2779-
match(ConstantExpr::getAdd(LC, RC), m_SpecificIntAllowUndef(Width)))
2780-
return ConstantExpr::mergeUndefsWith(LC, RC);
2781-
2782-
// (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.
2783-
// We limit this to X < Width in case the backend re-expands the intrinsic,
2784-
// and has to reintroduce a shift modulo operation (InstCombine might remove
2785-
// it after this fold). This still doesn't guarantee that the final codegen
2786-
// will match this original pattern.
2787-
if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) {
2788-
KnownBits KnownL = IC.computeKnownBits(L, /*Depth*/ 0, &Or);
2789-
return KnownL.getMaxValue().ult(Width) ? L : nullptr;
2790-
}
2748+
bool IsFshl = true; // Sub on LSHR.
2749+
SmallVector<Value *, 3> FShiftArgs;
27912750

2792-
// For non-constant cases, the following patterns currently only work for
2793-
// rotation patterns.
2794-
// TODO: Add general funnel-shift compatible patterns.
2795-
if (ShVal0 != ShVal1)
2751+
// First, find an or'd pair of opposite shifts:
2752+
// or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)
2753+
if (isa<BinaryOperator>(Or0) && isa<BinaryOperator>(Or1)) {
2754+
Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
2755+
if (!match(Or0,
2756+
m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
2757+
!match(Or1,
2758+
m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
2759+
Or0->getOpcode() == Or1->getOpcode())
27962760
return nullptr;
27972761

2798-
// For non-constant cases we don't support non-pow2 shift masks.
2799-
// TODO: Is it worth matching urem as well?
2800-
if (!isPowerOf2_32(Width))
2801-
return nullptr;
2762+
// Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
2763+
if (Or0->getOpcode() == BinaryOperator::LShr) {
2764+
std::swap(Or0, Or1);
2765+
std::swap(ShVal0, ShVal1);
2766+
std::swap(ShAmt0, ShAmt1);
2767+
}
2768+
assert(Or0->getOpcode() == BinaryOperator::Shl &&
2769+
Or1->getOpcode() == BinaryOperator::LShr &&
2770+
"Illegal or(shift,shift) pair");
2771+
2772+
// Match the shift amount operands for a funnel shift pattern. This always
2773+
// matches a subtraction on the R operand.
2774+
auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
2775+
// Check for constant shift amounts that sum to the bitwidth.
2776+
const APInt *LI, *RI;
2777+
if (match(L, m_APIntAllowUndef(LI)) && match(R, m_APIntAllowUndef(RI)))
2778+
if (LI->ult(Width) && RI->ult(Width) && (*LI + *RI) == Width)
2779+
return ConstantInt::get(L->getType(), *LI);
2780+
2781+
Constant *LC, *RC;
2782+
if (match(L, m_Constant(LC)) && match(R, m_Constant(RC)) &&
2783+
match(L,
2784+
m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&
2785+
match(R,
2786+
m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&
2787+
match(ConstantExpr::getAdd(LC, RC), m_SpecificIntAllowUndef(Width)))
2788+
return ConstantExpr::mergeUndefsWith(LC, RC);
2789+
2790+
// (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.
2791+
// We limit this to X < Width in case the backend re-expands the
2792+
// intrinsic, and has to reintroduce a shift modulo operation (InstCombine
2793+
// might remove it after this fold). This still doesn't guarantee that the
2794+
// final codegen will match this original pattern.
2795+
if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) {
2796+
KnownBits KnownL = IC.computeKnownBits(L, /*Depth*/ 0, &Or);
2797+
return KnownL.getMaxValue().ult(Width) ? L : nullptr;
2798+
}
28022799

2803-
// The shift amount may be masked with negation:
2804-
// (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
2805-
Value *X;
2806-
unsigned Mask = Width - 1;
2807-
if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
2808-
match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
2809-
return X;
2800+
// For non-constant cases, the following patterns currently only work for
2801+
// rotation patterns.
2802+
// TODO: Add general funnel-shift compatible patterns.
2803+
if (ShVal0 != ShVal1)
2804+
return nullptr;
28102805

2811-
// Similar to above, but the shift amount may be extended after masking,
2812-
// so return the extended value as the parameter for the intrinsic.
2813-
if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
2814-
match(R, m_And(m_Neg(m_ZExt(m_And(m_Specific(X), m_SpecificInt(Mask)))),
2815-
m_SpecificInt(Mask))))
2816-
return L;
2806+
// For non-constant cases we don't support non-pow2 shift masks.
2807+
// TODO: Is it worth matching urem as well?
2808+
if (!isPowerOf2_32(Width))
2809+
return nullptr;
28172810

2818-
if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
2819-
match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))
2820-
return L;
2811+
// The shift amount may be masked with negation:
2812+
// (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
2813+
Value *X;
2814+
unsigned Mask = Width - 1;
2815+
if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
2816+
match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
2817+
return X;
2818+
2819+
// Similar to above, but the shift amount may be extended after masking,
2820+
// so return the extended value as the parameter for the intrinsic.
2821+
if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
2822+
match(R,
2823+
m_And(m_Neg(m_ZExt(m_And(m_Specific(X), m_SpecificInt(Mask)))),
2824+
m_SpecificInt(Mask))))
2825+
return L;
2826+
2827+
if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
2828+
match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))
2829+
return L;
28212830

2822-
return nullptr;
2823-
};
2831+
return nullptr;
2832+
};
28242833

2825-
Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width);
2826-
bool IsFshl = true; // Sub on LSHR.
2827-
if (!ShAmt) {
2828-
ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width);
2829-
IsFshl = false; // Sub on SHL.
2834+
Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width);
2835+
if (!ShAmt) {
2836+
ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width);
2837+
IsFshl = false; // Sub on SHL.
2838+
}
2839+
if (!ShAmt)
2840+
return nullptr;
2841+
2842+
FShiftArgs = {ShVal0, ShVal1, ShAmt};
28302843
}
2831-
if (!ShAmt)
2844+
2845+
if (FShiftArgs.empty())
28322846
return nullptr;
28332847

28342848
Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
28352849
Function *F = Intrinsic::getDeclaration(Or.getModule(), IID, Or.getType());
2836-
return CallInst::Create(F, {ShVal0, ShVal1, ShAmt});
2850+
return CallInst::Create(F, FShiftArgs);
28372851
}
28382852

28392853
/// Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns.

0 commit comments

Comments
 (0)