Skip to content

Commit c437f6c

Browse files
committed
AMDGPU/GlobalISel: Split 64-bit G_CTPOP in RegBankSelect
1 parent 6135f5e commit c437f6c

File tree

2 files changed

+86
-10
lines changed

2 files changed

+86
-10
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2084,6 +2084,29 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
20842084
MI.eraseFromParent();
20852085
return;
20862086
}
2087+
case AMDGPU::G_CTPOP: {
2088+
MachineIRBuilder B(MI);
2089+
MachineFunction &MF = B.getMF();
2090+
2091+
const RegisterBank *DstBank =
2092+
OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
2093+
if (DstBank == &AMDGPU::SGPRRegBank)
2094+
break;
2095+
2096+
Register SrcReg = MI.getOperand(1).getReg();
2097+
const LLT S32 = LLT::scalar(32);
2098+
LLT Ty = MRI.getType(SrcReg);
2099+
if (Ty == S32)
2100+
break;
2101+
2102+
ApplyRegBankMapping ApplyVALU(*this, MRI, &AMDGPU::VGPRRegBank);
2103+
GISelObserverWrapper Observer(&ApplyVALU);
2104+
LegalizerHelper Helper(MF, Observer, B);
2105+
2106+
if (Helper.narrowScalar(MI, 1, S32) != LegalizerHelper::Legalized)
2107+
llvm_unreachable("widenScalar should have succeeded");
2108+
return;
2109+
}
20872110
case AMDGPU::G_SEXT:
20882111
case AMDGPU::G_ZEXT: {
20892112
Register SrcReg = MI.getOperand(1).getReg();
@@ -3172,9 +3195,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
31723195
case AMDGPU::G_BITCAST:
31733196
case AMDGPU::G_INTTOPTR:
31743197
case AMDGPU::G_PTRTOINT:
3175-
case AMDGPU::G_CTLZ_ZERO_UNDEF:
3176-
case AMDGPU::G_CTTZ_ZERO_UNDEF:
3177-
case AMDGPU::G_CTPOP:
31783198
case AMDGPU::G_BSWAP:
31793199
case AMDGPU::G_BITREVERSE:
31803200
case AMDGPU::G_FABS:
@@ -3184,6 +3204,21 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
31843204
OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
31853205
break;
31863206
}
3207+
case AMDGPU::G_CTLZ:
3208+
case AMDGPU::G_CTLZ_ZERO_UNDEF:
3209+
case AMDGPU::G_CTTZ:
3210+
case AMDGPU::G_CTTZ_ZERO_UNDEF:
3211+
case AMDGPU::G_CTPOP: {
3212+
unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
3213+
unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
3214+
OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
3215+
3216+
// This should really be getValueMappingSGPR64Only, but allowing the generic
3217+
// code to handle the register split just makes using LegalizerHelper more
3218+
// difficult.
3219+
OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
3220+
break;
3221+
}
31873222
case AMDGPU::G_TRUNC: {
31883223
Register Dst = MI.getOperand(0).getReg();
31893224
Register Src = MI.getOperand(1).getReg();

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctpop.mir

Lines changed: 48 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,29 +3,70 @@
33
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
44

55
---
6-
name: ctpop_i32_s
6+
name: ctpop_s32_s
77
legalized: true
88

99
body: |
1010
bb.0:
1111
liveins: $sgpr0
12-
; CHECK-LABEL: name: ctpop_i32_s
12+
; CHECK-LABEL: name: ctpop_s32_s
1313
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
14-
; CHECK: [[CTPOP:%[0-9]+]]:sgpr(s32) = G_CTPOP [[COPY]]
14+
; CHECK: [[CTPOP:%[0-9]+]]:sgpr(s32) = G_CTPOP [[COPY]](s32)
15+
; CHECK: S_ENDPGM 0, implicit [[CTPOP]](s32)
1516
%0:_(s32) = COPY $sgpr0
1617
%1:_(s32) = G_CTPOP %0
18+
S_ENDPGM 0, implicit %1
1719
...
1820

1921
---
20-
name: ctpop_i32_v
22+
name: ctpop_s32_v
2123
legalized: true
2224

2325
body: |
2426
bb.0:
25-
liveins: $vgpr0_vgpr1
26-
; CHECK-LABEL: name: ctpop_i32_v
27+
liveins: $vgpr0
28+
; CHECK-LABEL: name: ctpop_s32_v
2729
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
28-
; CHECK: [[CTPOP:%[0-9]+]]:vgpr(s32) = G_CTPOP [[COPY]]
30+
; CHECK: [[CTPOP:%[0-9]+]]:vgpr(s32) = G_CTPOP [[COPY]](s32)
31+
; CHECK: S_ENDPGM 0, implicit [[CTPOP]](s32)
2932
%0:_(s32) = COPY $vgpr0
3033
%1:_(s32) = G_CTPOP %0
34+
S_ENDPGM 0, implicit %1
35+
...
36+
37+
---
38+
name: ctpop_s64_s
39+
legalized: true
40+
41+
body: |
42+
bb.0:
43+
liveins: $sgpr0_sgpr1
44+
45+
; CHECK-LABEL: name: ctpop_s64_s
46+
; CHECK: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
47+
; CHECK: [[CTPOP:%[0-9]+]]:sgpr(s32) = G_CTPOP [[COPY]](s64)
48+
; CHECK: S_ENDPGM 0, implicit [[CTPOP]](s32)
49+
%0:_(s64) = COPY $sgpr0_sgpr1
50+
%1:_(s32) = G_CTPOP %0
51+
S_ENDPGM 0, implicit %1
52+
...
53+
54+
---
55+
name: ctpop_s64_v
56+
legalized: true
57+
58+
body: |
59+
bb.0:
60+
liveins: $vgpr0_vgpr1
61+
62+
; CHECK-LABEL: name: ctpop_s64_v
63+
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
64+
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
65+
; CHECK: [[CTPOP:%[0-9]+]]:vgpr(s32) = G_CTPOP [[UV]](s32)
66+
; CHECK: [[CTPOP1:%[0-9]+]]:vgpr(s32) = G_CTPOP [[UV1]](s32)
67+
; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[CTPOP1]], [[CTPOP]]
68+
; CHECK: S_ENDPGM 0, implicit [[ADD]](s32)
69+
%0:_(s64) = COPY $vgpr0_vgpr1
70+
%1:_(s32) = G_CTPOP %0
71+
S_ENDPGM 0, implicit %1
3172
...

0 commit comments

Comments
 (0)