Skip to content

Commit 0a42994

Browse files
authored
[GlobalISel] Fold G_CTTZ if possible (#86224)
This patch tries to fold `G_CTTZ` if possible.
1 parent 0fe0ef4 commit 0a42994

File tree

4 files changed

+58
-7
lines changed

4 files changed

+58
-7
lines changed

llvm/include/llvm/CodeGen/GlobalISel/Utils.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -308,10 +308,12 @@ std::optional<APFloat> ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
308308
Register Src,
309309
const MachineRegisterInfo &MRI);
310310

311-
/// Tries to constant fold a G_CTLZ operation on \p Src. If \p Src is a vector
312-
/// then it tries to do an element-wise constant fold.
311+
/// Tries to constant fold a counting-zero operation (G_CTLZ or G_CTTZ) on \p
312+
/// Src. If \p Src is a vector then it tries to do an element-wise constant
313+
/// fold.
313314
std::optional<SmallVector<unsigned>>
314-
ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI);
315+
ConstantFoldCountZeros(Register Src, const MachineRegisterInfo &MRI,
316+
std::function<unsigned(APInt)> CB);
315317

316318
/// Test if the given value is known to have exactly one bit set. This differs
317319
/// from computeKnownBits in that it doesn't necessarily determine which bit is

llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,10 +256,16 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
256256
return buildFConstant(DstOps[0], *Cst);
257257
break;
258258
}
259-
case TargetOpcode::G_CTLZ: {
259+
case TargetOpcode::G_CTLZ:
260+
case TargetOpcode::G_CTTZ: {
260261
assert(SrcOps.size() == 1 && "Expected one source");
261262
assert(DstOps.size() == 1 && "Expected one dest");
262-
auto MaybeCsts = ConstantFoldCTLZ(SrcOps[0].getReg(), *getMRI());
263+
std::function<unsigned(APInt)> CB;
264+
if (Opc == TargetOpcode::G_CTLZ)
265+
CB = [](APInt V) -> unsigned { return V.countl_zero(); };
266+
else
267+
CB = [](APInt V) -> unsigned { return V.countTrailingZeros(); };
268+
auto MaybeCsts = ConstantFoldCountZeros(SrcOps[0].getReg(), *getMRI(), CB);
263269
if (!MaybeCsts)
264270
break;
265271
if (MaybeCsts->size() == 1)

llvm/lib/CodeGen/GlobalISel/Utils.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -966,14 +966,15 @@ llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy, Register Src,
966966
}
967967

968968
std::optional<SmallVector<unsigned>>
969-
llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) {
969+
llvm::ConstantFoldCountZeros(Register Src, const MachineRegisterInfo &MRI,
970+
std::function<unsigned(APInt)> CB) {
970971
LLT Ty = MRI.getType(Src);
971972
SmallVector<unsigned> FoldedCTLZs;
972973
auto tryFoldScalar = [&](Register R) -> std::optional<unsigned> {
973974
auto MaybeCst = getIConstantVRegVal(R, MRI);
974975
if (!MaybeCst)
975976
return std::nullopt;
976-
return MaybeCst->countl_zero();
977+
return CB(*MaybeCst);
977978
};
978979
if (Ty.isVector()) {
979980
// Try to constant fold each element.

llvm/unittests/CodeGen/GlobalISel/CSETest.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,4 +233,46 @@ TEST_F(AArch64GISelMITest, TestConstantFoldCTL) {
233233
EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
234234
}
235235

236+
TEST_F(AArch64GISelMITest, TestConstantFoldCTT) {
237+
setUp();
238+
if (!TM)
239+
GTEST_SKIP();
240+
241+
LLT s32 = LLT::scalar(32);
242+
243+
GISelCSEInfo CSEInfo;
244+
CSEInfo.setCSEConfig(std::make_unique<CSEConfigConstantOnly>());
245+
CSEInfo.analyze(*MF);
246+
B.setCSEInfo(&CSEInfo);
247+
CSEMIRBuilder CSEB(B.getState());
248+
auto Cst8 = CSEB.buildConstant(s32, 8);
249+
auto *CttzDef = &*CSEB.buildCTTZ(s32, Cst8);
250+
EXPECT_TRUE(CttzDef->getOpcode() == TargetOpcode::G_CONSTANT);
251+
EXPECT_TRUE(CttzDef->getOperand(1).getCImm()->getZExtValue() == 3);
252+
253+
// Test vector.
254+
auto Cst16 = CSEB.buildConstant(s32, 16);
255+
auto Cst32 = CSEB.buildConstant(s32, 32);
256+
auto Cst64 = CSEB.buildConstant(s32, 64);
257+
LLT VecTy = LLT::fixed_vector(4, s32);
258+
auto BV = CSEB.buildBuildVector(VecTy, {Cst8.getReg(0), Cst16.getReg(0),
259+
Cst32.getReg(0), Cst64.getReg(0)});
260+
CSEB.buildCTTZ(VecTy, BV);
261+
262+
auto CheckStr = R"(
263+
; CHECK: [[CST8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
264+
; CHECK: [[CST3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
265+
; CHECK: [[CST16:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
266+
; CHECK: [[CST32:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
267+
; CHECK: [[CST64:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
268+
; CHECK: [[BV1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[CST8]]:_(s32), [[CST16]]:_(s32), [[CST32]]:_(s32), [[CST64]]:_(s32)
269+
; CHECK: [[CST27:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
270+
; CHECK: [[CST26:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
271+
; CHECK: [[CST25:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
272+
; CHECK: [[BV2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[CST3]]:_(s32), [[CST27]]:_(s32), [[CST26]]:_(s32), [[CST25]]:_(s32)
273+
)";
274+
275+
EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
276+
}
277+
236278
} // namespace

0 commit comments

Comments
 (0)