Skip to content

Commit f03c9f7

Browse files
pravinjagtapSyamaAmd
authored andcommitted
[AMDGPU] Mark AGPR tuple implicit in the first instr of AGPR spills. (llvm#115285)
When AGPRs are spilled to stack through VGPRs, the pei only marks the AGPR tuple as implicit-def. To preserve the liveness, it should also mark the tuple implicit. Fixes: SWDEV-462189 Change-Id: Icdbfe93d09ae7f71bbacc74bd778bfa738381a7b
1 parent aa52d41 commit f03c9f7

File tree

2 files changed

+27
-67
lines changed

2 files changed

+27
-67
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1702,6 +1702,8 @@ void SIRegisterInfo::buildSpillLoadStore(
17021702
.addReg(SubReg, getKillRegState(IsKill));
17031703
if (NeedSuperRegDef)
17041704
AccRead.addReg(ValueReg, RegState::ImplicitDefine);
1705+
if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
1706+
AccRead.addReg(ValueReg, RegState::Implicit);
17051707
AccRead->setAsmPrinterFlag(MachineInstr::ReloadReuse);
17061708
}
17071709
SubReg = TmpIntermediateVGPR;

llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir

Lines changed: 25 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 %s -o - -run-pass prologepilog,machine-cp -verify-machineinstrs | FileCheck -check-prefix=GFX908-PEI-MACHINECP %s
44

55
# When VGPRs are available for spilling, prologepilog marks the tuple implicit-def as well as implicit in the first spill instruction.
6-
# As a consequence, machine-cp would NOT delete agpr2 copy here.
76

87
---
98
name: agpr-spill-to-vgpr-machine-cp
109
tracksRegLiveness: true
1110
stack:
1211
- { id: 0, name: '', type: spill-slot, offset: 0, size: 128, alignment: 4 }
1312
machineFunctionInfo:
13+
isEntryFunction: true
1414
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
1515
stackPtrOffsetReg: '$sgpr32'
1616
hasSpilledVGPRs: true
@@ -21,14 +21,8 @@ body: |
2121
; GFX908-PEI-LABEL: name: agpr-spill-to-vgpr-machine-cp
2222
; GFX908-PEI: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33
2323
; GFX908-PEI-NEXT: {{ $}}
24-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6
25-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30_lo16, 32, $sgpr31_lo16, 32
26-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16
27-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32_lo16
28-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33_lo16
29-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16
30-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16
31-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16
24+
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02
25+
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg
3226
; GFX908-PEI-NEXT: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec
3327
; GFX908-PEI-NEXT: renamable $agpr2 = COPY renamable $vgpr1, implicit $exec
3428
; GFX908-PEI-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
@@ -39,14 +33,8 @@ body: |
3933
; GFX908-PEI-MACHINECP-LABEL: name: agpr-spill-to-vgpr-machine-cp
4034
; GFX908-PEI-MACHINECP: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33
4135
; GFX908-PEI-MACHINECP-NEXT: {{ $}}
42-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6
43-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30_lo16, 32, $sgpr31_lo16, 32
44-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr31_lo16
45-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr32_lo16
46-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr33_lo16
47-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16
48-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr1_lo16
49-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16
36+
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02
37+
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg
5038
; GFX908-PEI-MACHINECP-NEXT: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec
5139
; GFX908-PEI-MACHINECP-NEXT: renamable $agpr2 = COPY renamable $vgpr1, implicit $exec
5240
; GFX908-PEI-MACHINECP-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
@@ -59,15 +47,16 @@ body: |
5947
S_ENDPGM 0
6048
...
6149

62-
# When VGPRs are NOT available for spilling (stack is used), prologepilog marks the tuple implicit-def only and NOT implicit.
63-
# As a consequence, machine-cp would delete agpr2 copy here. Presently, this is incorrect behavior.
50+
# When VGPRs are NOT available for spilling (stack is used), prologepilog should also mark the tuple implicit-def and implicit (similar to above usecase).
51+
# As a consequence, machine-cp would not delete agpr2 copy here.
6452

6553
---
6654
name: agpr-spill-to-vgpr-to-stack-machine-cp
6755
tracksRegLiveness: true
6856
stack:
6957
- { id: 0, name: '', type: spill-slot, offset: 0, size: 128, alignment: 4 }
7058
machineFunctionInfo:
59+
isEntryFunction: true
7160
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
7261
stackPtrOffsetReg: '$sgpr32'
7362
hasSpilledVGPRs: true
@@ -76,69 +65,38 @@ body: |
7665
successors:
7766
liveins: $vgpr0, $vgpr1
7867
; GFX908-PEI-LABEL: name: agpr-spill-to-vgpr-to-stack-machine-cp
79-
; GFX908-PEI: liveins: $vgpr0, $vgpr1, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
68+
; GFX908-PEI: liveins: $vgpr0, $vgpr1, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $sgpr0_sgpr1_sgpr2_sgpr3
8069
; GFX908-PEI-NEXT: {{ $}}
81-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6
82-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30_lo16, 32, $sgpr31_lo16, 32
83-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16
84-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16
85-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16
86-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16
87-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16
88-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16
89-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16
90-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16
91-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16
92-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16
93-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16
94-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16
95-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16
96-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16
97-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16
98-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16
99-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16
100-
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16
70+
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02
71+
; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg
72+
; GFX908-PEI-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
73+
; GFX908-PEI-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
10174
; GFX908-PEI-NEXT: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec
10275
; GFX908-PEI-NEXT: renamable $agpr2 = COPY renamable $vgpr1, implicit $exec
10376
; GFX908-PEI-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = IMPLICIT_DEF
10477
; GFX908-PEI-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = IMPLICIT_DEF
105-
; GFX908-PEI-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
106-
; GFX908-PEI-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5)
78+
; GFX908-PEI-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
79+
; GFX908-PEI-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5)
10780
; GFX908-PEI-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
108-
; GFX908-PEI-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
81+
; GFX908-PEI-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
10982
; GFX908-PEI-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
11083
; GFX908-PEI-NEXT: S_ENDPGM 0
11184
;
11285
; GFX908-PEI-MACHINECP-LABEL: name: agpr-spill-to-vgpr-to-stack-machine-cp
113-
; GFX908-PEI-MACHINECP: liveins: $vgpr0, $vgpr1, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55
86+
; GFX908-PEI-MACHINECP: liveins: $vgpr0, $vgpr1, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $sgpr0_sgpr1_sgpr2_sgpr3
11487
; GFX908-PEI-MACHINECP-NEXT: {{ $}}
115-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION llvm_def_aspace_cfa $sgpr32_lo16, 0, 6
116-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION llvm_register_pair $pc_reg, $sgpr30_lo16, 32, $sgpr31_lo16, 32
117-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr2_lo16
118-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr3_lo16
119-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr4_lo16
120-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr5_lo16
121-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr6_lo16
122-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr7_lo16
123-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr8_lo16
124-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr9_lo16
125-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr10_lo16
126-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr11_lo16
127-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr12_lo16
128-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr13_lo16
129-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr14_lo16
130-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr15_lo16
131-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr16_lo16
132-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $vgpr17_lo16
133-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr0_lo16
134-
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $agpr2_lo16
88+
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02
89+
; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg
90+
; GFX908-PEI-MACHINECP-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
91+
; GFX908-PEI-MACHINECP-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
13592
; GFX908-PEI-MACHINECP-NEXT: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec
93+
; GFX908-PEI-MACHINECP-NEXT: renamable $agpr2 = COPY renamable $vgpr1, implicit $exec
13694
; GFX908-PEI-MACHINECP-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = IMPLICIT_DEF
13795
; GFX908-PEI-MACHINECP-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = IMPLICIT_DEF
138-
; GFX908-PEI-MACHINECP-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
139-
; GFX908-PEI-MACHINECP-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5)
96+
; GFX908-PEI-MACHINECP-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
97+
; GFX908-PEI-MACHINECP-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5)
14098
; GFX908-PEI-MACHINECP-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
141-
; GFX908-PEI-MACHINECP-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
99+
; GFX908-PEI-MACHINECP-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
142100
; GFX908-PEI-MACHINECP-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
143101
; GFX908-PEI-MACHINECP-NEXT: S_ENDPGM 0
144102
renamable $agpr0 = COPY renamable $vgpr0, implicit $exec

0 commit comments

Comments
 (0)