Skip to content

Commit 6135f5e

Browse files
committed
GlobalISel: Fix narrowing of G_CTLZ/G_CTTZ
The result type is separate from the source type.
1 parent 2126c70 commit 6135f5e

File tree

5 files changed

+109
-29
lines changed

5 files changed

+109
-29
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3978,23 +3978,24 @@ LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
39783978
if (TypeIdx != 1)
39793979
return UnableToLegalize;
39803980

3981-
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
3981+
Register DstReg = MI.getOperand(0).getReg();
3982+
Register SrcReg = MI.getOperand(1).getReg();
3983+
LLT DstTy = MRI.getType(DstReg);
3984+
LLT SrcTy = MRI.getType(SrcReg);
39823985
unsigned NarrowSize = NarrowTy.getSizeInBits();
39833986

39843987
if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
39853988
MachineIRBuilder &B = MIRBuilder;
3986-
auto UnmergeSrc = B.buildUnmerge(NarrowTy, MI.getOperand(1));
3989+
auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
39873990
// ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
39883991
auto C_0 = B.buildConstant(NarrowTy, 0);
39893992
auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
39903993
UnmergeSrc.getReg(1), C_0);
3991-
auto LoCTLZ = B.buildCTLZ(NarrowTy, UnmergeSrc.getReg(0));
3992-
auto C_NarrowSize = B.buildConstant(NarrowTy, NarrowSize);
3993-
auto HiIsZeroCTLZ = B.buildAdd(NarrowTy, LoCTLZ, C_NarrowSize);
3994-
auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(NarrowTy, UnmergeSrc.getReg(1));
3995-
auto LoOut = B.buildSelect(NarrowTy, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
3996-
3997-
B.buildMerge(MI.getOperand(0), {LoOut, C_0});
3994+
auto LoCTLZ = B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
3995+
auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
3996+
auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
3997+
auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
3998+
B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
39983999

39994000
MI.eraseFromParent();
40004001
return Legalized;
@@ -4009,23 +4010,24 @@ LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
40094010
if (TypeIdx != 1)
40104011
return UnableToLegalize;
40114012

4012-
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
4013+
Register DstReg = MI.getOperand(0).getReg();
4014+
Register SrcReg = MI.getOperand(1).getReg();
4015+
LLT DstTy = MRI.getType(DstReg);
4016+
LLT SrcTy = MRI.getType(SrcReg);
40134017
unsigned NarrowSize = NarrowTy.getSizeInBits();
40144018

40154019
if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
40164020
MachineIRBuilder &B = MIRBuilder;
4017-
auto UnmergeSrc = B.buildUnmerge(NarrowTy, MI.getOperand(1));
4021+
auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
40184022
// cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
40194023
auto C_0 = B.buildConstant(NarrowTy, 0);
40204024
auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
40214025
UnmergeSrc.getReg(0), C_0);
4022-
auto HiCTTZ = B.buildCTTZ(NarrowTy, UnmergeSrc.getReg(1));
4023-
auto C_NarrowSize = B.buildConstant(NarrowTy, NarrowSize);
4024-
auto LoIsZeroCTTZ = B.buildAdd(NarrowTy, HiCTTZ, C_NarrowSize);
4025-
auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(NarrowTy, UnmergeSrc.getReg(0));
4026-
auto LoOut = B.buildSelect(NarrowTy, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
4027-
4028-
B.buildMerge(MI.getOperand(0), {LoOut, C_0});
4026+
auto HiCTTZ = B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
4027+
auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
4028+
auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
4029+
auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
4030+
B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
40294031

40304032
MI.eraseFromParent();
40314033
return Legalized;

llvm/lib/Target/Mips/MipsLegalizerInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,12 +204,14 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
204204

205205
getActionDefinitionsBuilder(G_CTLZ)
206206
.legalFor({{s32, s32}})
207+
.maxScalar(0, s32)
207208
.maxScalar(1, s32);
208209
getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
209210
.lowerFor({{s32, s32}});
210211

211212
getActionDefinitionsBuilder(G_CTTZ)
212213
.lowerFor({{s32, s32}})
214+
.maxScalar(0, s32)
213215
.maxScalar(1, s32);
214216
getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF)
215217
.lowerFor({{s32, s32}, {s64, s64}});

llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctlz.mir

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,10 @@ body: |
4242
; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
4343
; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
4444
; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD]], [[CTLZ1]]
45-
; MIPS32: $v0 = COPY [[SELECT]](s32)
46-
; MIPS32: $v1 = COPY [[C]](s32)
45+
; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[C]](s32)
46+
; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
47+
; MIPS32: $v0 = COPY [[UV]](s32)
48+
; MIPS32: $v1 = COPY [[UV1]](s32)
4749
; MIPS32: RetRA implicit $v0, implicit $v1
4850
%1:_(s32) = COPY $a0
4951
%2:_(s32) = COPY $a1

llvm/test/CodeGen/Mips/GlobalISel/legalizer/cttz.mir

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,10 @@ body: |
5757
; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
5858
; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
5959
; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[ADD1]], [[SUB1]]
60-
; MIPS32: $v0 = COPY [[SELECT]](s32)
61-
; MIPS32: $v1 = COPY [[C]](s32)
60+
; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[C]](s32)
61+
; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
62+
; MIPS32: $v0 = COPY [[UV]](s32)
63+
; MIPS32: $v1 = COPY [[UV1]](s32)
6264
; MIPS32: RetRA implicit $v0, implicit $v1
6365
%1:_(s32) = COPY $a0
6466
%2:_(s32) = COPY $a1
@@ -140,23 +142,25 @@ body: |
140142
; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
141143
; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
142144
; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[ADD1]], [[SUB1]]
143-
; MIPS32: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C]]
145+
; MIPS32: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[C1]](s32)
146+
; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV1]](s64)
147+
; MIPS32: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[C]]
144148
; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[C]]
145-
; MIPS32: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[C1]], [[C1]]
149+
; MIPS32: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[C1]]
146150
; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32)
147151
; MIPS32: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
148152
; MIPS32: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[AND3]]
149-
; MIPS32: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ADD3]](s32), [[ADD5]](s32)
153+
; MIPS32: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ADD3]](s32), [[ADD5]](s32)
150154
; MIPS32: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C1]]
151155
; MIPS32: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[C1]]
152156
; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR2]], [[XOR3]]
153157
; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s32), [[C1]]
154158
; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32)
155159
; MIPS32: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]]
156-
; MIPS32: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s32), [[MV]], [[MV1]]
157-
; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT1]](s64)
158-
; MIPS32: $v0 = COPY [[UV]](s32)
159-
; MIPS32: $v1 = COPY [[UV1]](s32)
160+
; MIPS32: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s32), [[MV]], [[MV2]]
161+
; MIPS32: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT1]](s64)
162+
; MIPS32: $v0 = COPY [[UV2]](s32)
163+
; MIPS32: $v1 = COPY [[UV3]](s32)
160164
; MIPS32: RetRA implicit $v0, implicit $v1
161165
%1:_(s32) = COPY $a0
162166
%2:_(s32) = COPY $a1

llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,76 @@ TEST_F(GISelMITest, LowerBitCountingCTTZ1) {
9090
EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
9191
}
9292

93+
// CTLZ scalar narrowing
94+
TEST_F(GISelMITest, NarrowScalarCTLZ) {
95+
setUp();
96+
if (!TM)
97+
return;
98+
99+
// Declare your legalization info
100+
DefineLegalizerInfo(A, {
101+
getActionDefinitionsBuilder(G_CTLZ).legalFor({{s32, s32}});
102+
});
103+
// Build Instr
104+
auto CTLZ =
105+
B.buildInstr(TargetOpcode::G_CTLZ, {LLT::scalar(32)}, {Copies[0]});
106+
AInfo Info(MF->getSubtarget());
107+
DummyGISelObserver Observer;
108+
LegalizerHelper Helper(*MF, Info, Observer, B);
109+
// Perform Legalization
110+
EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
111+
Helper.narrowScalar(*CTLZ, 1, LLT::scalar(32)));
112+
113+
auto CheckStr = R"(
114+
CHECK: [[UNMERGE_LO:%[0-9]+]]:_(s32), [[UNMERGE_HI:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %0:_(s64)
115+
CHECK: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
116+
CHECK: [[CMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UNMERGE_HI]]:_(s32), [[ZERO]]:_
117+
CHECK: [[CTLZ_LO:%[0-9]+]]:_(s32) = G_CTLZ [[UNMERGE_LO]]:_(s32)
118+
CHECK: [[THIRTYTWO:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
119+
CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[CTLZ_LO]]:_, [[THIRTYTWO]]:_
120+
CHECK: [[CTLZ_HI:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UNMERGE_HI]]:_(s32)
121+
CHECK: %{{[0-9]+}}:_(s32) = G_SELECT [[CMP]]:_(s1), [[ADD]]:_, [[CTLZ_HI]]:_
122+
)";
123+
124+
// Check
125+
EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
126+
}
127+
128+
// CTTZ scalar narrowing
129+
TEST_F(GISelMITest, NarrowScalarCTTZ) {
130+
setUp();
131+
if (!TM)
132+
return;
133+
134+
// Declare your legalization info
135+
DefineLegalizerInfo(A, {
136+
getActionDefinitionsBuilder(G_CTTZ).legalFor({{s32, s64}});
137+
});
138+
// Build Instr
139+
auto CTTZ =
140+
B.buildInstr(TargetOpcode::G_CTTZ, {LLT::scalar(32)}, {Copies[0]});
141+
AInfo Info(MF->getSubtarget());
142+
DummyGISelObserver Observer;
143+
LegalizerHelper Helper(*MF, Info, Observer, B);
144+
// Perform Legalization
145+
EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
146+
Helper.narrowScalar(*CTTZ, 1, LLT::scalar(32)));
147+
148+
auto CheckStr = R"(
149+
CHECK: [[UNMERGE_LO:%[0-9]+]]:_(s32), [[UNMERGE_HI:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %0:_(s64)
150+
CHECK: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
151+
CHECK: [[CMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UNMERGE_LO]]:_(s32), [[ZERO]]:_
152+
CHECK: [[CTTZ_HI:%[0-9]+]]:_(s32) = G_CTTZ [[UNMERGE_HI]]:_(s32)
153+
CHECK: [[THIRTYTWO:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
154+
CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[CTTZ_HI]]:_, [[THIRTYTWO]]:_
155+
CHECK: [[CTTZ_LO:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UNMERGE_LO]]:_(s32)
156+
CHECK: %{{[0-9]+}}:_(s32) = G_SELECT [[CMP]]:_(s1), [[ADD]]:_, [[CTTZ_LO]]:_
157+
)";
158+
159+
// Check
160+
EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
161+
}
162+
93163
// CTTZ expansion in terms of CTPOP
94164
TEST_F(GISelMITest, LowerBitCountingCTTZ2) {
95165
setUp();

0 commit comments

Comments
 (0)