Skip to content

[InstCombine] Refactor matchFunnelShift to allow more pattern (NFC) #68474

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 19, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 30 additions & 16 deletions llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -2732,16 +2732,22 @@ static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) {
// rotate matching code under visitSelect and visitTrunc? // rotate matching code under visitSelect and visitTrunc?
unsigned Width = Or.getType()->getScalarSizeInBits(); unsigned Width = Or.getType()->getScalarSizeInBits();


// First, find an or'd pair of opposite shifts: Instruction *Or0, *Or1;
// or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1) if (!match(Or.getOperand(0), m_Instruction(Or0)) ||
BinaryOperator *Or0, *Or1; !match(Or.getOperand(1), m_Instruction(Or1)))
if (!match(Or.getOperand(0), m_BinOp(Or0)) ||
!match(Or.getOperand(1), m_BinOp(Or1)))
return nullptr; return nullptr;


bool IsFshl = true; // Sub on LSHR.
SmallVector<Value *, 3> FShiftArgs;

// First, find an or'd pair of opposite shifts:
// or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)
if (isa<BinaryOperator>(Or0) && isa<BinaryOperator>(Or1)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the rationale for this check? Isn't it covered by the matching of logical shifts?

But either way, if it is indeed useful, can you invert it and early return to keep nested scopes down.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to match either
or (shl %x, 16) (shr %y, 16)
or (shl %x, 16) (zext %y)

if (isa(Or0) && isa(Or1)) is used to filter in first case and that is the original code matched.
68ab662 filters in second case.
If we want to further reduce nested scopes. We may need to split this function to several static functions to match each case, but this requires to pass some args and may not be concise.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code seems new? Not that its wrong or anything.

But either way don't understand why it can't be an early return. If we don't enter the if FShiftArgs will be null and we will return anyways.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code seems new? Not that its wrong or anything.

I added this if statement in this patch.

It can be early returned in this patch, but I'd like to add another else if on 5b3b1bb. If I removed this if statement, then I need to add the if back and indent the code in the if statement on #68502. This make the diff not clear.

image

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay fair enough.

Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1; Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) || if (!match(Or0,
!match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) || m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
!match(Or1,
m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
Or0->getOpcode() == Or1->getOpcode()) Or0->getOpcode() == Or1->getOpcode())
return nullptr; return nullptr;


Expand All @@ -2766,16 +2772,18 @@ static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) {


Constant *LC, *RC; Constant *LC, *RC;
if (match(L, m_Constant(LC)) && match(R, m_Constant(RC)) && if (match(L, m_Constant(LC)) && match(R, m_Constant(RC)) &&
match(L, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) && match(L,
match(R, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) && m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&
match(R,
m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&
match(ConstantExpr::getAdd(LC, RC), m_SpecificIntAllowUndef(Width))) match(ConstantExpr::getAdd(LC, RC), m_SpecificIntAllowUndef(Width)))
return ConstantExpr::mergeUndefsWith(LC, RC); return ConstantExpr::mergeUndefsWith(LC, RC);


// (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width. // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.
// We limit this to X < Width in case the backend re-expands the intrinsic, // We limit this to X < Width in case the backend re-expands the
// and has to reintroduce a shift modulo operation (InstCombine might remove // intrinsic, and has to reintroduce a shift modulo operation (InstCombine
// it after this fold). This still doesn't guarantee that the final codegen // might remove it after this fold). This still doesn't guarantee that the
// will match this original pattern. // final codegen will match this original pattern.
if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) { if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) {
KnownBits KnownL = IC.computeKnownBits(L, /*Depth*/ 0, &Or); KnownBits KnownL = IC.computeKnownBits(L, /*Depth*/ 0, &Or);
return KnownL.getMaxValue().ult(Width) ? L : nullptr; return KnownL.getMaxValue().ult(Width) ? L : nullptr;
Expand Down Expand Up @@ -2803,7 +2811,8 @@ static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) {
// Similar to above, but the shift amount may be extended after masking, // Similar to above, but the shift amount may be extended after masking,
// so return the extended value as the parameter for the intrinsic. // so return the extended value as the parameter for the intrinsic.
if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) && if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
match(R, m_And(m_Neg(m_ZExt(m_And(m_Specific(X), m_SpecificInt(Mask)))), match(R,
m_And(m_Neg(m_ZExt(m_And(m_Specific(X), m_SpecificInt(Mask)))),
m_SpecificInt(Mask)))) m_SpecificInt(Mask))))
return L; return L;


Expand All @@ -2815,17 +2824,22 @@ static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) {
}; };


Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width); Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width);
bool IsFshl = true; // Sub on LSHR.
if (!ShAmt) { if (!ShAmt) {
ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width); ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width);
IsFshl = false; // Sub on SHL. IsFshl = false; // Sub on SHL.
} }
if (!ShAmt) if (!ShAmt)
return nullptr; return nullptr;


FShiftArgs = {ShVal0, ShVal1, ShAmt};
}

if (FShiftArgs.empty())
return nullptr;

Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr; Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
Function *F = Intrinsic::getDeclaration(Or.getModule(), IID, Or.getType()); Function *F = Intrinsic::getDeclaration(Or.getModule(), IID, Or.getType());
return CallInst::Create(F, {ShVal0, ShVal1, ShAmt}); return CallInst::Create(F, FShiftArgs);
} }


/// Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns. /// Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns.
Expand Down