Skip to content

Commit 0aa439d

Browse files
committed
AMDGPU/GlobalISel: Use SGPR results for G_AMDGPU_WAVE_ADDRESS
1 parent ff56b74 commit 0aa439d

File tree

4 files changed

+22
-17
lines changed

4 files changed

+22
-17
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4377,8 +4377,12 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset(
43774377
Register Reg = Root.getReg();
43784378
const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
43794379

4380-
const MachineInstr *Def = MRI->getVRegDef(Reg);
4381-
if (Register WaveBase = getWaveAddress(Def)) {
4380+
std::optional<DefinitionAndSourceRegister> Def =
4381+
getDefSrcRegIgnoringCopies(Reg, *MRI);
4382+
assert(Def && "this shouldn't be an optional result");
4383+
Reg = Def->Reg;
4384+
4385+
if (Register WaveBase = getWaveAddress(Def->MI)) {
43824386
return {{
43834387
[=](MachineInstrBuilder &MIB) { // rsrc
43844388
MIB.addReg(Info->getScratchRSrcReg());
@@ -4394,10 +4398,12 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset(
43944398

43954399
// FIXME: Copy check is a hack
43964400
Register BasePtr;
4397-
if (mi_match(Reg, *MRI, m_GPtrAdd(m_Reg(BasePtr), m_Copy(m_ICst(Offset))))) {
4401+
if (mi_match(Reg, *MRI,
4402+
m_GPtrAdd(m_Reg(BasePtr),
4403+
m_any_of(m_ICst(Offset), m_Copy(m_ICst(Offset)))))) {
43984404
if (!SIInstrInfo::isLegalMUBUFImmOffset(Offset))
43994405
return {};
4400-
const MachineInstr *BasePtrDef = MRI->getVRegDef(BasePtr);
4406+
MachineInstr *BasePtrDef = getDefIgnoringCopies(BasePtr, *MRI);
44014407
Register WaveBase = getWaveAddress(BasePtrDef);
44024408
if (!WaveBase)
44034409
return {};

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3817,9 +3817,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
38173817
// This case is weird because we expect a physical register in the source,
38183818
// but need to set a bank anyway.
38193819
//
3820-
// We could select the result to SGPR or VGPR, but for the one current use
3821-
// it's more practical to always use VGPR.
3822-
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
3820+
// TODO: We could select the result to SGPR or VGPR
3821+
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
38233822
OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
38243823
break;
38253824
}

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -781,16 +781,14 @@ body: |
781781
; GFX6: liveins: $vgpr0, $vgpr1
782782
; GFX6-NEXT: {{ $}}
783783
; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
784-
; GFX6-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
785-
; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
786-
; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_LSHRREV_B32_e64_]], [[V_MOV_B32_e32_]], 0, implicit $exec
787-
; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5)
784+
; GFX6-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, implicit $exec :: (store (s32), addrspace 5)
785+
;
788786
; GFX9-LABEL: name: function_store_private_s32_to_4_wave_address_offset_4095
789787
; GFX9: liveins: $vgpr0, $vgpr1
790788
; GFX9-NEXT: {{ $}}
791789
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
792-
; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
793-
; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[V_LSHRREV_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (store (s32), addrspace 5)
790+
; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, implicit $exec :: (store (s32), addrspace 5)
791+
;
794792
; GFX11-LABEL: name: function_store_private_s32_to_4_wave_address_offset_4095
795793
; GFX11: liveins: $vgpr0, $vgpr1
796794
; GFX11-NEXT: {{ $}}
@@ -830,6 +828,7 @@ body: |
830828
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
831829
; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_LSHRREV_B32_e64_]], [[COPY1]], 0, implicit $exec
832830
; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5)
831+
;
833832
; GFX9-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4096
834833
; GFX9: liveins: $vgpr0, $vgpr1
835834
; GFX9-NEXT: {{ $}}

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ legalized: true
99
body: |
1010
bb.0:
1111
; CHECK-LABEL: name: amdgpu_wave_address
12-
; CHECK: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
12+
; CHECK: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:sgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
1313
; CHECK-NEXT: S_ENDPGM 0, implicit [[AMDGPU_WAVE_ADDRESS]](p5)
1414
%0:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
1515
S_ENDPGM 0, implicit %0
@@ -23,9 +23,10 @@ body: |
2323
bb.0:
2424
; CHECK-LABEL: name: amdgpu_wave_address_v
2525
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
26-
; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
27-
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
28-
; CHECK-NEXT: G_STORE [[AMDGPU_WAVE_ADDRESS]](p5), [[COPY]](p1) :: (store (p5), addrspace 1)
26+
; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:sgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
27+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p5) = COPY [[AMDGPU_WAVE_ADDRESS]](p5)
28+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
29+
; CHECK-NEXT: G_STORE [[COPY]](p5), [[COPY1]](p1) :: (store (p5), addrspace 1)
2930
%0:_(p1) = G_IMPLICIT_DEF
3031
%1:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
3132
G_STORE %1, %0 :: (store (p5), addrspace 1)

0 commit comments

Comments
 (0)