Skip to content

Commit 312a9d1

Browse files
committed
GlobalISel: Fix narrowScalar for G_{CTLZ|CTTZ}_ZERO_UNDEF
Narrow these for 64-bit VALU for AMDGPU.
1 parent c437f6c commit 312a9d1

File tree

4 files changed

+114
-18
lines changed

4 files changed

+114
-18
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,8 +1028,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
10281028
if (TypeIdx == 1)
10291029
switch (MI.getOpcode()) {
10301030
case TargetOpcode::G_CTLZ:
1031+
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
10311032
return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
10321033
case TargetOpcode::G_CTTZ:
1034+
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
10331035
return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
10341036
case TargetOpcode::G_CTPOP:
10351037
return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
@@ -3985,13 +3987,17 @@ LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
39853987
unsigned NarrowSize = NarrowTy.getSizeInBits();
39863988

39873989
if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
3990+
const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
3991+
39883992
MachineIRBuilder &B = MIRBuilder;
39893993
auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
39903994
// ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
39913995
auto C_0 = B.buildConstant(NarrowTy, 0);
39923996
auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
39933997
UnmergeSrc.getReg(1), C_0);
3994-
auto LoCTLZ = B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
3998+
auto LoCTLZ = IsUndef ?
3999+
B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
4000+
B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
39954001
auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
39964002
auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
39974003
auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
@@ -4017,13 +4023,17 @@ LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
40174023
unsigned NarrowSize = NarrowTy.getSizeInBits();
40184024

40194025
if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
4026+
const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
4027+
40204028
MachineIRBuilder &B = MIRBuilder;
40214029
auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
40224030
// cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
40234031
auto C_0 = B.buildConstant(NarrowTy, 0);
40244032
auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
40254033
UnmergeSrc.getReg(0), C_0);
4026-
auto HiCTTZ = B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
4034+
auto HiCTTZ = IsUndef ?
4035+
B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
4036+
B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
40274037
auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
40284038
auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
40294039
auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2084,7 +2084,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
20842084
MI.eraseFromParent();
20852085
return;
20862086
}
2087-
case AMDGPU::G_CTPOP: {
2087+
case AMDGPU::G_CTPOP:
2088+
case AMDGPU::G_CTLZ_ZERO_UNDEF:
2089+
case AMDGPU::G_CTTZ_ZERO_UNDEF: {
20882090
MachineIRBuilder B(MI);
20892091
MachineFunction &MF = B.getMF();
20902092

@@ -2104,7 +2106,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
21042106
LegalizerHelper Helper(MF, Observer, B);
21052107

21062108
if (Helper.narrowScalar(MI, 1, S32) != LegalizerHelper::Legalized)
2107-
llvm_unreachable("widenScalar should have succeeded");
2109+
llvm_unreachable("narrowScalar should have succeeded");
21082110
return;
21092111
}
21102112
case AMDGPU::G_SEXT:
@@ -3204,9 +3206,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
32043206
OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
32053207
break;
32063208
}
3207-
case AMDGPU::G_CTLZ:
32083209
case AMDGPU::G_CTLZ_ZERO_UNDEF:
3209-
case AMDGPU::G_CTTZ:
32103210
case AMDGPU::G_CTTZ_ZERO_UNDEF:
32113211
case AMDGPU::G_CTPOP: {
32123212
unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctlz-zero-undef.mir

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,29 +3,72 @@
33
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
44

55
---
6-
name: ctlz_zero_undef_i32_s
6+
name: ctlz_zero_undef_s32_s
77
legalized: true
88

99
body: |
1010
bb.0:
1111
liveins: $sgpr0
12-
; CHECK-LABEL: name: ctlz_zero_undef_i32_s
12+
; CHECK-LABEL: name: ctlz_zero_undef_s32_s
1313
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
14-
; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:sgpr(s32) = G_CTLZ_ZERO_UNDEF [[COPY]]
14+
; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:sgpr(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s32)
15+
; CHECK: S_ENDPGM 0, implicit [[CTLZ_ZERO_UNDEF]](s32)
1516
%0:_(s32) = COPY $sgpr0
1617
%1:_(s32) = G_CTLZ_ZERO_UNDEF %0
18+
S_ENDPGM 0, implicit %1
1719
...
1820

1921
---
20-
name: ctlz_zero_undef_i32_v
22+
name: ctlz_zero_undef_s32_v
2123
legalized: true
2224

2325
body: |
2426
bb.0:
2527
liveins: $vgpr0_vgpr1
26-
; CHECK-LABEL: name: ctlz_zero_undef_i32_v
28+
; CHECK-LABEL: name: ctlz_zero_undef_s32_v
2729
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
28-
; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:vgpr(s32) = G_CTLZ_ZERO_UNDEF [[COPY]]
30+
; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:vgpr(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s32)
31+
; CHECK: S_ENDPGM 0, implicit [[CTLZ_ZERO_UNDEF]](s32)
2932
%0:_(s32) = COPY $vgpr0
3033
%1:_(s32) = G_CTLZ_ZERO_UNDEF %0
34+
S_ENDPGM 0, implicit %1
35+
...
36+
37+
---
38+
name: ctlz_zero_undef_s64_s
39+
legalized: true
40+
41+
body: |
42+
bb.0:
43+
liveins: $sgpr0_sgpr1
44+
; CHECK-LABEL: name: ctlz_zero_undef_s64_s
45+
; CHECK: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
46+
; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:sgpr(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64)
47+
; CHECK: S_ENDPGM 0, implicit [[CTLZ_ZERO_UNDEF]](s32)
48+
%0:_(s64) = COPY $sgpr0_sgpr1
49+
%1:_(s32) = G_CTLZ_ZERO_UNDEF %0
50+
S_ENDPGM 0, implicit %1
51+
...
52+
53+
---
54+
name: ctlz_zero_undef_s64_v
55+
legalized: true
56+
57+
body: |
58+
bb.0:
59+
liveins: $vgpr0_vgpr1
60+
; CHECK-LABEL: name: ctlz_zero_undef_s64_v
61+
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
62+
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
63+
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
64+
; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[C]]
65+
; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:vgpr(s32) = G_CTLZ_ZERO_UNDEF [[UV]](s32)
66+
; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 32
67+
; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[CTLZ_ZERO_UNDEF]], [[C1]]
68+
; CHECK: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:vgpr(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s32)
69+
; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[CTLZ_ZERO_UNDEF1]]
70+
; CHECK: S_ENDPGM 0, implicit [[SELECT]](s32)
71+
%0:_(s64) = COPY $vgpr0_vgpr1
72+
%1:_(s32) = G_CTLZ_ZERO_UNDEF %0
73+
S_ENDPGM 0, implicit %1
3174
...

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-cttz-zero-undef.mir

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,29 +3,72 @@
33
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
44

55
---
6-
name: cttz_zero_undef_i32_s
6+
name: cttz_zero_undef_s32_s
77
legalized: true
88

99
body: |
1010
bb.0:
1111
liveins: $sgpr0
12-
; CHECK-LABEL: name: cttz_zero_undef_i32_s
12+
; CHECK-LABEL: name: cttz_zero_undef_s32_s
1313
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
14-
; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:sgpr(s32) = G_CTTZ_ZERO_UNDEF [[COPY]]
14+
; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:sgpr(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32)
15+
; CHECK: S_ENDPGM 0, implicit [[CTTZ_ZERO_UNDEF]](s32)
1516
%0:_(s32) = COPY $sgpr0
1617
%1:_(s32) = G_CTTZ_ZERO_UNDEF %0
18+
S_ENDPGM 0, implicit %1
1719
...
1820

1921
---
20-
name: cttz_zero_undef_i32_v
22+
name: cttz_zero_undef_s32_v
2123
legalized: true
2224

2325
body: |
2426
bb.0:
2527
liveins: $vgpr0_vgpr1
26-
; CHECK-LABEL: name: cttz_zero_undef_i32_v
28+
; CHECK-LABEL: name: cttz_zero_undef_s32_v
2729
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
28-
; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:vgpr(s32) = G_CTTZ_ZERO_UNDEF [[COPY]]
30+
; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:vgpr(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32)
31+
; CHECK: S_ENDPGM 0, implicit [[CTTZ_ZERO_UNDEF]](s32)
2932
%0:_(s32) = COPY $vgpr0
3033
%1:_(s32) = G_CTTZ_ZERO_UNDEF %0
34+
S_ENDPGM 0, implicit %1
35+
...
36+
37+
---
38+
name: cttz_zero_undef_s64_s
39+
legalized: true
40+
41+
body: |
42+
bb.0:
43+
liveins: $sgpr0_sgpr1
44+
; CHECK-LABEL: name: cttz_zero_undef_s64_s
45+
; CHECK: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
46+
; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:sgpr(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s64)
47+
; CHECK: S_ENDPGM 0, implicit [[CTTZ_ZERO_UNDEF]](s32)
48+
%0:_(s64) = COPY $sgpr0_sgpr1
49+
%1:_(s32) = G_CTTZ_ZERO_UNDEF %0
50+
S_ENDPGM 0, implicit %1
51+
...
52+
53+
---
54+
name: cttz_zero_undef_s64_v
55+
legalized: true
56+
57+
body: |
58+
bb.0:
59+
liveins: $vgpr0_vgpr1
60+
; CHECK-LABEL: name: cttz_zero_undef_s64_v
61+
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
62+
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
63+
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
64+
; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV]](s32), [[C]]
65+
; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:vgpr(s32) = G_CTTZ_ZERO_UNDEF [[UV1]](s32)
66+
; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 32
67+
; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[CTTZ_ZERO_UNDEF]], [[C1]]
68+
; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:vgpr(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s32)
69+
; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[CTTZ_ZERO_UNDEF1]]
70+
; CHECK: S_ENDPGM 0, implicit [[SELECT]](s32)
71+
%0:_(s64) = COPY $vgpr0_vgpr1
72+
%1:_(s32) = G_CTTZ_ZERO_UNDEF %0
73+
S_ENDPGM 0, implicit %1
3174
...

0 commit comments

Comments
 (0)