Skip to content

Commit 0693e82

Browse files
committed
AMDGPU/GlobalISel: Do a better job splitting 64-bit G_SEXT_INREG
We don't need to expand to full shifts for the > 32-bit case. This just switches to a sext_inreg of the high half.
1 parent 05f2a04 commit 0693e82

File tree

2 files changed

+68
-56
lines changed

2 files changed

+68
-56
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 18 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1799,53 +1799,39 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
17991799
return;
18001800
}
18011801
case AMDGPU::G_SEXT_INREG: {
1802-
const RegisterBank *SrcBank =
1803-
OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
1804-
1805-
// We can directly handle all 64-bit cases with s_bfe_i64.
1806-
if (SrcBank == &AMDGPU::SGPRRegBank)
1807-
break;
1802+
SmallVector<Register, 2> SrcRegs(OpdMapper.getVRegs(1));
1803+
if (SrcRegs.empty())
1804+
break; // Nothing to repair
18081805

18091806
const LLT S32 = LLT::scalar(32);
1810-
Register DstReg = MI.getOperand(0).getReg();
1811-
Register SrcReg = MI.getOperand(1).getReg();
1812-
LLT Ty = MRI.getType(DstReg);
1813-
if (Ty == S32)
1814-
break;
1815-
18161807
MachineIRBuilder B(MI);
18171808
ApplyRegBankMapping O(*this, MRI, &AMDGPU::VGPRRegBank);
18181809
GISelObserverWrapper Observer(&O);
18191810
B.setChangeObserver(Observer);
18201811

1812+
// Don't use LegalizerHelper's narrowScalar. It produces unwanted G_SEXTs
1813+
// we would need to further expand, and doesn't let us directly set the
1814+
// result registers.
1815+
SmallVector<Register, 2> DstRegs(OpdMapper.getVRegs(0));
1816+
18211817
int Amt = MI.getOperand(2).getImm();
18221818
if (Amt <= 32) {
1823-
// Don't use LegalizerHelper's narrowScalar. It produces unwanted G_SEXTs
1824-
// we would need to further expand, and doesn't let us directly set the
1825-
// result registers.
1826-
SmallVector<Register, 2> DstRegs(OpdMapper.getVRegs(0));
1827-
SmallVector<Register, 2> SrcRegs(OpdMapper.getVRegs(1));
1828-
1829-
if (SrcRegs.empty())
1830-
split64BitValueForMapping(B, SrcRegs, S32, SrcReg);
1831-
// Extend in the low bits and propagate the sign bit to the high half.
1832-
auto ShiftAmt = B.buildConstant(S32, 31);
18331819
if (Amt == 32) {
1820+
// The low bits are unchanged.
18341821
B.buildCopy(DstRegs[0], SrcRegs[0]);
1835-
B.buildAShr(DstRegs[1], DstRegs[0], ShiftAmt);
18361822
} else {
1823+
// Extend in the low bits and propagate the sign bit to the high half.
18371824
B.buildSExtInReg(DstRegs[0], SrcRegs[0], Amt);
1838-
B.buildAShr(DstRegs[1], DstRegs[0], ShiftAmt);
18391825
}
1826+
1827+
B.buildAShr(DstRegs[1], DstRegs[0], B.buildConstant(S32, 31));
18401828
} else {
1841-
assert(empty(OpdMapper.getVRegs(0)) && empty(OpdMapper.getVRegs(1)));
1842-
const LLT S64 = LLT::scalar(64);
1843-
// This straddles two registers. Expand with 64-bit shifts.
1844-
auto ShiftAmt = B.buildConstant(S32, 64 - Amt);
1845-
auto Shl = B.buildShl(S64, SrcReg, ShiftAmt);
1846-
B.buildAShr(DstReg, Shl, ShiftAmt);
1829+
// The low bits are unchanged, and extend in the high bits.
1830+
B.buildCopy(DstRegs[0], SrcRegs[0]);
1831+
B.buildSExtInReg(DstRegs[1], DstRegs[0], Amt - 32);
18471832
}
18481833

1834+
Register DstReg = MI.getOperand(0).getReg();
18491835
MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
18501836
MI.eraseFromParent();
18511837
return;
@@ -2965,7 +2951,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
29652951
}
29662952
case AMDGPU::G_ZEXT:
29672953
case AMDGPU::G_SEXT:
2968-
case AMDGPU::G_ANYEXT: {
2954+
case AMDGPU::G_ANYEXT:
2955+
case AMDGPU::G_SEXT_INREG: {
29692956
Register Dst = MI.getOperand(0).getReg();
29702957
Register Src = MI.getOperand(1).getReg();
29712958
unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
@@ -2996,24 +2983,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
29962983
}
29972984
break;
29982985
}
2999-
case AMDGPU::G_SEXT_INREG: {
3000-
Register Dst = MI.getOperand(0).getReg();
3001-
Register Src = MI.getOperand(1).getReg();
3002-
Register Amt = MI.getOperand(2).getImm();
3003-
unsigned Size = getSizeInBits(Dst, MRI, *TRI);
3004-
unsigned BankID = getRegBank(Src, MRI, *TRI)->getID();
3005-
3006-
if (Amt <= 32) {
3007-
OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(BankID, Size);
3008-
} else {
3009-
// If we need to expand a 64 bit for the VALU, this will straddle two
3010-
// registers. Just expand this with 64-bit shifts.
3011-
OpdsMapping[0] = AMDGPU::getValueMapping(BankID, Size);
3012-
}
3013-
3014-
OpdsMapping[1] = OpdsMapping[0];
3015-
break;
3016-
}
30172986
case AMDGPU::G_FCMP: {
30182987
unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
30192988
unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir

Lines changed: 50 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,8 @@ body: |
121121
; CHECK-LABEL: name: sext_inreg_v_s64_1
122122
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
123123
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
124-
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
125124
; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[UV]], 1
125+
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
126126
; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
127127
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT_INREG]](s32), [[ASHR]](s32)
128128
; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
@@ -143,8 +143,8 @@ body: |
143143
; CHECK-LABEL: name: sext_inreg_v_s64_31
144144
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
145145
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
146-
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
147146
; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[UV]], 31
147+
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
148148
; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
149149
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT_INREG]](s32), [[ASHR]](s32)
150150
; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
@@ -165,8 +165,8 @@ body: |
165165
; CHECK-LABEL: name: sext_inreg_v_s64_32
166166
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
167167
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
168-
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
169168
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
169+
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
170170
; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY1]], [[C]](s32)
171171
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[ASHR]](s32)
172172
; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
@@ -186,12 +186,55 @@ body: |
186186
187187
; CHECK-LABEL: name: sext_inreg_v_s64_33
188188
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
189-
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
190-
; CHECK: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[COPY]], [[C]](s32)
191-
; CHECK: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[C]](s32)
192-
; CHECK: S_ENDPGM 0, implicit [[ASHR]](s64)
189+
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
190+
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
191+
; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 1
192+
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32)
193+
; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
193194
%0:_(s64) = COPY $vgpr0_vgpr1
194195
%1:_(s64) = G_SEXT_INREG %0, 33
195196
S_ENDPGM 0, implicit %1
196197
197198
...
199+
200+
---
201+
name: sext_inreg_v_s64_35
202+
legalized: true
203+
204+
body: |
205+
bb.0:
206+
liveins: $vgpr0_vgpr1
207+
208+
; CHECK-LABEL: name: sext_inreg_v_s64_35
209+
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
210+
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
211+
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
212+
; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 3
213+
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32)
214+
; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
215+
%0:_(s64) = COPY $vgpr0_vgpr1
216+
%1:_(s64) = G_SEXT_INREG %0, 35
217+
S_ENDPGM 0, implicit %1
218+
219+
...
220+
221+
---
222+
name: sext_inreg_v_s64_63
223+
legalized: true
224+
225+
body: |
226+
bb.0:
227+
liveins: $vgpr0_vgpr1
228+
229+
; CHECK-LABEL: name: sext_inreg_v_s64_63
230+
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
231+
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
232+
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
233+
; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 31
234+
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32)
235+
; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
236+
%0:_(s64) = COPY $vgpr0_vgpr1
237+
%1:_(s64) = G_SEXT_INREG %0, 63
238+
S_ENDPGM 0, implicit %1
239+
240+
...

0 commit comments

Comments
 (0)