Skip to content

Commit bdaa82a

Browse files
authored
[AMDGPU] Mark AGPR tuple implicit in the first instr of AGPR spills. (#115285)
When AGPRs are spilled to stack through VGPRs, the pei only marks the AGPR tuple as implicit-def. To preserve the liveness, it should also mark the tuple implicit. Fixes: SWDEV-462189
1 parent e582865 commit bdaa82a

File tree

2 files changed

+19
-11
lines changed

2 files changed

+19
-11
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1817,6 +1817,8 @@ void SIRegisterInfo::buildSpillLoadStore(
18171817
.addReg(SubReg, getKillRegState(IsKill));
18181818
if (NeedSuperRegDef)
18191819
AccRead.addReg(ValueReg, RegState::ImplicitDefine);
1820+
if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
1821+
AccRead.addReg(ValueReg, RegState::Implicit);
18201822
AccRead->setAsmPrinterFlag(MachineInstr::ReloadReuse);
18211823
}
18221824
SubReg = TmpIntermediateVGPR;

llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 %s -o - -run-pass prologepilog,machine-cp -verify-machineinstrs | FileCheck -check-prefix=GFX908-PEI-MACHINECP %s
44

55
# When VGPRs are available for spilling, prologepilog marks the tuple implicit-def as well as implicit in the first spill instruction.
6-
# As a consequence, machine-cp would NOT delete agpr2 copy here.
76

87
---
98
name: agpr-spill-to-vgpr-machine-cp
109
tracksRegLiveness: true
1110
stack:
1211
- { id: 0, name: '', type: spill-slot, offset: 0, size: 128, alignment: 4 }
1312
machineFunctionInfo:
13+
isEntryFunction: true
1414
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
1515
stackPtrOffsetReg: '$sgpr32'
1616
hasSpilledVGPRs: true
@@ -43,15 +43,16 @@ body: |
4343
S_ENDPGM 0
4444
...
4545

46-
# When VGPRs are NOT available for spilling (stack is used), prologepilog marks the tuple implicit-def only and NOT implicit.
47-
# As a consequence, machine-cp would delete agpr2 copy here. Presently, this is incorrect behavior.
46+
# When VGPRs are NOT available for spilling (stack is used), prologepilog should also mark the tuple implicit-def and implicit (similar to above usecase).
47+
# As a consequence, machine-cp would not delete agpr2 copy here.
4848

4949
---
5050
name: agpr-spill-to-vgpr-to-stack-machine-cp
5151
tracksRegLiveness: true
5252
stack:
5353
- { id: 0, name: '', type: spill-slot, offset: 0, size: 128, alignment: 4 }
5454
machineFunctionInfo:
55+
isEntryFunction: true
5556
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
5657
stackPtrOffsetReg: '$sgpr32'
5758
hasSpilledVGPRs: true
@@ -60,29 +61,34 @@ body: |
6061
successors:
6162
liveins: $vgpr0, $vgpr1
6263
; GFX908-PEI-LABEL: name: agpr-spill-to-vgpr-to-stack-machine-cp
63-
; GFX908-PEI: liveins: $vgpr0, $vgpr1, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
64+
; GFX908-PEI: liveins: $vgpr0, $vgpr1, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $sgpr0_sgpr1_sgpr2_sgpr3
6465
; GFX908-PEI-NEXT: {{ $}}
66+
; GFX908-PEI-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
67+
; GFX908-PEI-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
6568
; GFX908-PEI-NEXT: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec
6669
; GFX908-PEI-NEXT: renamable $agpr2 = COPY renamable $vgpr1, implicit $exec
6770
; GFX908-PEI-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = IMPLICIT_DEF
6871
; GFX908-PEI-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = IMPLICIT_DEF
69-
; GFX908-PEI-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
70-
; GFX908-PEI-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5)
72+
; GFX908-PEI-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
73+
; GFX908-PEI-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5)
7174
; GFX908-PEI-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
72-
; GFX908-PEI-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
75+
; GFX908-PEI-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
7376
; GFX908-PEI-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
7477
; GFX908-PEI-NEXT: S_ENDPGM 0
7578
;
7679
; GFX908-PEI-MACHINECP-LABEL: name: agpr-spill-to-vgpr-to-stack-machine-cp
77-
; GFX908-PEI-MACHINECP: liveins: $vgpr0, $vgpr1, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
80+
; GFX908-PEI-MACHINECP: liveins: $vgpr0, $vgpr1, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $sgpr0_sgpr1_sgpr2_sgpr3
7881
; GFX908-PEI-MACHINECP-NEXT: {{ $}}
82+
; GFX908-PEI-MACHINECP-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
83+
; GFX908-PEI-MACHINECP-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
7984
; GFX908-PEI-MACHINECP-NEXT: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec
85+
; GFX908-PEI-MACHINECP-NEXT: renamable $agpr2 = COPY renamable $vgpr1, implicit $exec
8086
; GFX908-PEI-MACHINECP-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = IMPLICIT_DEF
8187
; GFX908-PEI-MACHINECP-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = IMPLICIT_DEF
82-
; GFX908-PEI-MACHINECP-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
83-
; GFX908-PEI-MACHINECP-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5)
88+
; GFX908-PEI-MACHINECP-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
89+
; GFX908-PEI-MACHINECP-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5)
8490
; GFX908-PEI-MACHINECP-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
85-
; GFX908-PEI-MACHINECP-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
91+
; GFX908-PEI-MACHINECP-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
8692
; GFX908-PEI-MACHINECP-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
8793
; GFX908-PEI-MACHINECP-NEXT: S_ENDPGM 0
8894
renamable $agpr0 = COPY renamable $vgpr0, implicit $exec

0 commit comments

Comments
 (0)