Skip to content

[GlobalISel] Fold G_CTTZ if possible #86224

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions llvm/include/llvm/CodeGen/GlobalISel/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -308,10 +308,12 @@ std::optional<APFloat> ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
Register Src,
const MachineRegisterInfo &MRI);

/// Tries to constant fold a G_CTLZ operation on \p Src. If \p Src is a vector
/// then it tries to do an element-wise constant fold.
/// Tries to constant fold a counting-zero operation (G_CTLZ or G_CTTZ) on \p
/// Src. If \p Src is a vector then it tries to do an element-wise constant
/// fold.
std::optional<SmallVector<unsigned>>
ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI);
ConstantFoldCountZeros(Register Src, const MachineRegisterInfo &MRI,
std::function<unsigned(APInt)> CB);

/// Test if the given value is known to have exactly one bit set. This differs
/// from computeKnownBits in that it doesn't necessarily determine which bit is
Expand Down
10 changes: 8 additions & 2 deletions llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,10 +256,16 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
return buildFConstant(DstOps[0], *Cst);
break;
}
case TargetOpcode::G_CTLZ: {
case TargetOpcode::G_CTLZ:
case TargetOpcode::G_CTTZ: {
Comment on lines +259 to +260
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably also handle the _ZERO_UNDEF variants

assert(SrcOps.size() == 1 && "Expected one source");
assert(DstOps.size() == 1 && "Expected one dest");
auto MaybeCsts = ConstantFoldCTLZ(SrcOps[0].getReg(), *getMRI());
std::function<unsigned(APInt)> CB;
if (Opc == TargetOpcode::G_CTLZ)
CB = [](APInt V) -> unsigned { return V.countl_zero(); };
else
CB = [](APInt V) -> unsigned { return V.countTrailingZeros(); };
auto MaybeCsts = ConstantFoldCountZeros(SrcOps[0].getReg(), *getMRI(), CB);
if (!MaybeCsts)
break;
if (MaybeCsts->size() == 1)
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/CodeGen/GlobalISel/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -966,14 +966,15 @@ llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy, Register Src,
}

std::optional<SmallVector<unsigned>>
llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) {
llvm::ConstantFoldCountZeros(Register Src, const MachineRegisterInfo &MRI,
std::function<unsigned(APInt)> CB) {
LLT Ty = MRI.getType(Src);
SmallVector<unsigned> FoldedCTLZs;
auto tryFoldScalar = [&](Register R) -> std::optional<unsigned> {
auto MaybeCst = getIConstantVRegVal(R, MRI);
if (!MaybeCst)
return std::nullopt;
return MaybeCst->countl_zero();
return CB(*MaybeCst);
};
if (Ty.isVector()) {
// Try to constant fold each element.
Expand Down
42 changes: 42 additions & 0 deletions llvm/unittests/CodeGen/GlobalISel/CSETest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,4 +233,46 @@ TEST_F(AArch64GISelMITest, TestConstantFoldCTL) {
EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
}

TEST_F(AArch64GISelMITest, TestConstantFoldCTT) {
setUp();
if (!TM)
GTEST_SKIP();

LLT s32 = LLT::scalar(32);

GISelCSEInfo CSEInfo;
CSEInfo.setCSEConfig(std::make_unique<CSEConfigConstantOnly>());
CSEInfo.analyze(*MF);
B.setCSEInfo(&CSEInfo);
CSEMIRBuilder CSEB(B.getState());
auto Cst8 = CSEB.buildConstant(s32, 8);
auto *CttzDef = &*CSEB.buildCTTZ(s32, Cst8);
EXPECT_TRUE(CttzDef->getOpcode() == TargetOpcode::G_CONSTANT);
EXPECT_TRUE(CttzDef->getOperand(1).getCImm()->getZExtValue() == 3);

// Test vector.
auto Cst16 = CSEB.buildConstant(s32, 16);
auto Cst32 = CSEB.buildConstant(s32, 32);
auto Cst64 = CSEB.buildConstant(s32, 64);
LLT VecTy = LLT::fixed_vector(4, s32);
auto BV = CSEB.buildBuildVector(VecTy, {Cst8.getReg(0), Cst16.getReg(0),
Cst32.getReg(0), Cst64.getReg(0)});
CSEB.buildCTTZ(VecTy, BV);

auto CheckStr = R"(
; CHECK: [[CST8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK: [[CST3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK: [[CST16:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK: [[CST32:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; CHECK: [[CST64:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
; CHECK: [[BV1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[CST8]]:_(s32), [[CST16]]:_(s32), [[CST32]]:_(s32), [[CST64]]:_(s32)
; CHECK: [[CST27:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK: [[CST26:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK: [[CST25:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
; CHECK: [[BV2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[CST3]]:_(s32), [[CST27]]:_(s32), [[CST26]]:_(s32), [[CST25]]:_(s32)
)";

EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
}

} // namespace