Skip to content

Commit dd89249

Browse files
committed
[AMDGPU] Annotate vgpr<->agpr spills in asm
Differential Revision: https://reviews.llvm.org/D92125
1 parent bf8683a commit dd89249

File tree

3 files changed

+53
-53
lines changed

3 files changed

+53
-53
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -697,8 +697,10 @@ static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST,
697697
unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32
698698
: AMDGPU::V_ACCVGPR_READ_B32;
699699

700-
return BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
701-
.addReg(Src, getKillRegState(IsKill));
700+
auto MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
701+
.addReg(Src, getKillRegState(IsKill));
702+
MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
703+
return MIB;
702704
}
703705

704706
// This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
@@ -871,10 +873,12 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
871873
RS->setRegUsed(TmpReg);
872874
}
873875
if (IsStore) {
874-
auto AccRead = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg)
876+
auto AccRead = BuildMI(*MBB, MI, DL,
877+
TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg)
875878
.addReg(SubReg, getKillRegState(IsKill));
876879
if (NeedSuperRegDef)
877880
AccRead.addReg(ValueReg, RegState::ImplicitDefine);
881+
AccRead->setAsmPrinterFlag(MachineInstr::ReloadReuse);
878882
}
879883
SubReg = TmpReg;
880884
}
@@ -908,10 +912,12 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
908912
if (!IsAGPR && NeedSuperRegDef)
909913
MIB.addReg(ValueReg, RegState::ImplicitDefine);
910914

911-
if (!IsStore && TmpReg != AMDGPU::NoRegister)
915+
if (!IsStore && TmpReg != AMDGPU::NoRegister) {
912916
MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32),
913917
FinalReg)
914918
.addReg(TmpReg, RegState::Kill);
919+
MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
920+
}
915921
} else {
916922
if (NeedSuperRegDef)
917923
MIB.addReg(ValueReg, RegState::ImplicitDefine);

llvm/test/CodeGen/AMDGPU/spill-agpr.ll

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55
; A2M-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
66
; A2M-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
77
; A2V-NOT: SCRATCH_RSRC
8-
; GFX908-DAG: v_accvgpr_read_b32 v[[VSPILL:[0-9]+]], a0
8+
; GFX908-DAG: v_accvgpr_read_b32 v[[VSPILL:[0-9]+]], a0 ; Reload Reuse
99
; A2M: buffer_store_dword v[[VSPILL]], off, s[{{[0-9:]+}}], 0 offset:[[FI:[0-9]+]] ; 4-byte Folded Spill
1010
; A2M: buffer_load_dword v[[VSPILL:[0-9]+]], off, s[{{[0-9:]+}}], 0 offset:[[FI]] ; 4-byte Folded Reload
11-
; GFX908: v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL]]
11+
; GFX908: v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL]] ; Reload Reuse
1212
; A2V: ScratchSize: 0
1313
define amdgpu_kernel void @max_24regs_32a_used(<16 x float> addrspace(1)* %arg, float addrspace(1)* %out) #0 {
1414
bb:
@@ -34,10 +34,10 @@ bb:
3434
; A2M-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
3535
; A2M-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
3636
; A2V-NOT: SCRATCH_RSRC
37-
; GFX908-DAG: v_accvgpr_read_b32 v[[VSPILL:[0-9]+]], a{{[0-9]+}}
37+
; GFX908-DAG: v_accvgpr_read_b32 v[[VSPILL:[0-9]+]], a{{[0-9]+}} ; Reload Reuse
3838
; A2M: buffer_store_dword v[[VSPILL]], off, s[{{[0-9:]+}}], 0 offset:[[FI:[0-9]+]] ; 4-byte Folded Spill
3939
; A2M: buffer_load_dword v[[VSPILL:[0-9]+]], off, s[{{[0-9:]+}}], 0 offset:[[FI]] ; 4-byte Folded Reload
40-
; A2V: v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL]]
40+
; A2V: v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL]] ; Reload Reuse
4141
; A2V: ScratchSize: 0
4242
define amdgpu_kernel void @max_12regs_13a_used(i32 %cond, <4 x float> addrspace(1)* %arg, <4 x float> addrspace(1)* %out) #2 {
4343
bb:
@@ -55,8 +55,7 @@ use:
5555
st:
5656
%gep1 = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i64 16
5757
%gep2 = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i64 32
58-
store <4 x float> %mai.1, <4 x float> addrspace(1)* %gep1
59-
store <4 x float> %mai.2, <4 x float> addrspace(1)* %gep2
58+
call void asm sideeffect "", "a,a"(<4 x float> %mai.1, <4 x float> %mai.2)
6059
ret void
6160
}
6261

@@ -65,36 +64,31 @@ st:
6564
; A2M-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
6665
; A2V-NOT: SCRATCH_RSRC
6766

68-
; A2V: v_accvgpr_read_b32 v[[VSPILL:[0-9]+]], a{{[0-9]+}}
69-
; A2V: v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL]]
67+
; A2V: v_accvgpr_read_b32 v[[VSPILL:[0-9]+]], a{{[0-9]+}} ; Reload Reuse
68+
; A2V: v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL]] ; Reload Reuse
7069
; A2V: ScratchSize: 0
7170

7271
; A2M: buffer_store_dword v[[VSPILLSTORE:[0-9]+]], off, s[{{[0-9:]+}}], 0 offset:[[FI:[0-9]+]] ; 4-byte Folded Spill
7372
; A2M: buffer_load_dword v[[VSPILL_RELOAD:[0-9]+]], off, s[{{[0-9:]+}}], 0 offset:[[FI]] ; 4-byte Folded Reload
74-
; A2M: v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL_RELOAD]]
73+
; A2M: v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL_RELOAD]] ; Reload Reuse
7574
define amdgpu_kernel void @max_10_vgprs_used_9a() #1 {
76-
%v0 = load volatile i32, i32 addrspace(3)* undef
77-
%v1 = load volatile i32, i32 addrspace(3)* undef
78-
%v2 = load volatile i32, i32 addrspace(3)* undef
79-
%v3 = load volatile i32, i32 addrspace(3)* undef
80-
%v4 = load volatile i32, i32 addrspace(3)* undef
81-
%v5 = load volatile i32, i32 addrspace(3)* undef
82-
%v6 = load volatile i32, i32 addrspace(3)* undef
83-
%v7 = load volatile i32, i32 addrspace(3)* undef
84-
call void asm sideeffect "", "a,a,a,a,~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6}"(i32 %v0, i32 %v1, i32 %v2, i32 %v3)
85-
%v8 = load volatile i32, i32 addrspace(3)* undef
86-
call void asm sideeffect "", "a,a,a,a,a"(i32 %v4, i32 %v5, i32 %v6, i32 %v7, i32 %v8)
75+
%a1 = call <4 x i32> asm sideeffect "", "=a"()
76+
%a2 = call <4 x i32> asm sideeffect "", "=a"()
77+
%a3 = call i32 asm sideeffect "", "=a"()
78+
%a4 = call <2 x i32> asm sideeffect "", "=a"()
79+
call void asm sideeffect "", "a,a,a"(<4 x i32> %a1, <4 x i32> %a2, i32 %a3)
80+
call void asm sideeffect "", "a"(<2 x i32> %a4)
8781
ret void
8882
}
8983

9084
; GCN-LABEL: {{^}}max_32regs_mfma32:
9185
; A2M-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
9286
; A2M-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
9387
; A2V-NOT: SCRATCH_RSRC
94-
; GFX908-DAG: v_accvgpr_read_b32 v[[VSPILL:[0-9]+]], a0
88+
; GFX908-DAG: v_accvgpr_read_b32 v[[VSPILL:[0-9]+]], a0 ; Reload Reuse
9589
; A2M: buffer_store_dword v[[VSPILL]], off, s[{{[0-9:]+}}], 0 offset:[[FI:[0-9]+]] ; 4-byte Folded Spill
9690
; A2M: buffer_load_dword v[[VSPILL:[0-9]+]], off, s[{{[0-9:]+}}], 0 offset:[[FI]] ; 4-byte Folded Reload
97-
; GFX908: v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL]]
91+
; GFX908: v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL]] ; Reload Reuse
9892
; A2V: ScratchSize: 0
9993
define amdgpu_kernel void @max_32regs_mfma32(float addrspace(1)* %arg) #3 {
10094
bb:
@@ -115,6 +109,6 @@ declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float, float, <4 x float>, i3
115109
declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
116110

117111
attributes #0 = { nounwind "amdgpu-num-vgpr"="24" }
118-
attributes #1 = { nounwind "amdgpu-num-vgpr"="8" }
112+
attributes #1 = { nounwind "amdgpu-num-vgpr"="10" }
119113
attributes #2 = { nounwind "amdgpu-num-vgpr"="12" }
120114
attributes #3 = { nounwind "amdgpu-num-vgpr"="32" }

llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,15 @@
55
; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
66
; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
77
; GFX908-NOT: SCRATCH_RSRC
8-
; GFX908-DAG: v_accvgpr_write_b32 a0, v{{[0-9]}}
9-
; GFX908-DAG: v_accvgpr_write_b32 a1, v{{[0-9]}}
8+
; GFX908-DAG: v_accvgpr_write_b32 a0, v{{[0-9]}} ; Reload Reuse
9+
; GFX908-DAG: v_accvgpr_write_b32 a1, v{{[0-9]}} ; Reload Reuse
1010
; GFX900: buffer_store_dword v{{[0-9]}},
1111
; GFX900: buffer_store_dword v{{[0-9]}},
1212
; GFX900: buffer_load_dword v{{[0-9]}},
1313
; GFX900: buffer_load_dword v{{[0-9]}},
1414
; GFX908-NOT: buffer_
15-
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a0
16-
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a1
15+
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a0 ; Reload Reuse
16+
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a1 ; Reload Reuse
1717

1818
; GCN: NumVgprs: 10
1919
; GFX900: ScratchSize: 12
@@ -59,10 +59,10 @@ define amdgpu_kernel void @max_10_vgprs(i32 addrspace(1)* %p) #0 {
5959
; GCN-LABEL: {{^}}max_10_vgprs_used_9a:
6060
; GFX908-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
6161
; GFX908-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
62-
; GFX908-DAG: v_accvgpr_write_b32 a9, v{{[0-9]}}
62+
; GFX908-DAG: v_accvgpr_write_b32 a9, v{{[0-9]}} ; Reload Reuse
6363
; GFX908: buffer_store_dword v{{[0-9]}},
6464
; GFX908-NOT: buffer_
65-
; GFX908: v_accvgpr_read_b32 v{{[0-9]}}, a9
65+
; GFX908: v_accvgpr_read_b32 v{{[0-9]}}, a9 ; Reload Reuse
6666
; GFX908: buffer_load_dword v{{[0-9]}},
6767
; GFX908-NOT: buffer_
6868

@@ -113,28 +113,28 @@ define amdgpu_kernel void @max_10_vgprs_used_9a(i32 addrspace(1)* %p) #0 {
113113
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
114114
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
115115
; GFX908-DAG: v_accvgpr_write_b32 a0, 1
116-
; GFX908-DAG: v_accvgpr_write_b32 a1, v{{[0-9]}}
117-
; GFX908-DAG: v_accvgpr_write_b32 a2, v{{[0-9]}}
118-
; GFX908-DAG: v_accvgpr_write_b32 a3, v{{[0-9]}}
119-
; GFX908-DAG: v_accvgpr_write_b32 a4, v{{[0-9]}}
120-
; GFX908-DAG: v_accvgpr_write_b32 a5, v{{[0-9]}}
121-
; GFX908-DAG: v_accvgpr_write_b32 a6, v{{[0-9]}}
122-
; GFX908-DAG: v_accvgpr_write_b32 a7, v{{[0-9]}}
123-
; GFX908-DAG: v_accvgpr_write_b32 a8, v{{[0-9]}}
124-
; GFX908-DAG: v_accvgpr_write_b32 a9, v{{[0-9]}}
116+
; GFX908-DAG: v_accvgpr_write_b32 a1, v{{[0-9]}} ; Reload Reuse
117+
; GFX908-DAG: v_accvgpr_write_b32 a2, v{{[0-9]}} ; Reload Reuse
118+
; GFX908-DAG: v_accvgpr_write_b32 a3, v{{[0-9]}} ; Reload Reuse
119+
; GFX908-DAG: v_accvgpr_write_b32 a4, v{{[0-9]}} ; Reload Reuse
120+
; GFX908-DAG: v_accvgpr_write_b32 a5, v{{[0-9]}} ; Reload Reuse
121+
; GFX908-DAG: v_accvgpr_write_b32 a6, v{{[0-9]}} ; Reload Reuse
122+
; GFX908-DAG: v_accvgpr_write_b32 a7, v{{[0-9]}} ; Reload Reuse
123+
; GFX908-DAG: v_accvgpr_write_b32 a8, v{{[0-9]}} ; Reload Reuse
124+
; GFX908-DAG: v_accvgpr_write_b32 a9, v{{[0-9]}} ; Reload Reuse
125125
; GFX900: buffer_store_dword v{{[0-9]}},
126126
; GCN-DAG: buffer_store_dword v{{[0-9]}},
127127
; GFX900: buffer_load_dword v{{[0-9]}},
128128
; GCN-DAG: buffer_load_dword v{{[0-9]}},
129-
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a1
130-
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a2
131-
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a3
132-
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a4
133-
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a5
134-
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a6
135-
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a7
136-
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a8
137-
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a9
129+
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a1 ; Reload Reuse
130+
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a2 ; Reload Reuse
131+
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a3 ; Reload Reuse
132+
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a4 ; Reload Reuse
133+
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a5 ; Reload Reuse
134+
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a6 ; Reload Reuse
135+
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a7 ; Reload Reuse
136+
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a8 ; Reload Reuse
137+
; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a9 ; Reload Reuse
138138

139139
; GCN: NumVgprs: 10
140140
; GFX900: ScratchSize: 44
@@ -166,8 +166,8 @@ define amdgpu_kernel void @max_10_vgprs_used_1a_partial_spill(i64 addrspace(1)*
166166
; GCN-LABEL: {{^}}max_10_vgprs_spill_v32:
167167
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
168168
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
169-
; GFX908-DAG: v_accvgpr_write_b32 a0,
170-
; GFX908-DAG: v_accvgpr_write_b32 a9, v{{[0-9]}}
169+
; GFX908-DAG: v_accvgpr_write_b32 a0, v{{[0-9]}} ; Reload Reuse
170+
; GFX908-DAG: v_accvgpr_write_b32 a9, v{{[0-9]}} ; Reload Reuse
171171
; GCN-NOT: a10
172172
; GCN: buffer_store_dword v{{[0-9]}},
173173

0 commit comments

Comments
 (0)