Skip to content

Commit 5f0ea4f

Browse files
committed
[AMDGPU] Reduce attaching of implicit defines to spill
Extension of https://reviews.llvm.org/D141101 to remove the define flag from the last stack memory access. Fixes case where COPY instructions are used for some of the stack restoration, but the copies get optimized away during the machine-cp pass. Prior to this change, was possible to produce the following code: $agpr16_agpr17_agpr18_agpr19 = SCRATCH_LOAD_DWORDX4_ST 64, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s128) from %stack.17, align 4, addrspace 5) $agpr20_agpr21_agpr22_agpr23 = SCRATCH_LOAD_DWORDX4_ST 80, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.17 + 16, align 4, addrspace 5) $agpr24_agpr25_agpr26_agpr27 = SCRATCH_LOAD_DWORDX4_ST 96, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.17 + 32, align 4, addrspace 5) $agpr31 = COPY $agpr112, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 $agpr30 = COPY $agpr208, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 $agpr28_agpr29 = SCRATCH_LOAD_DWORDX2_ST 112, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s64) from %stack.17 + 48, align 4, addrspace 5) where `$agpr30 = COPY $agpr208` would be optimized away by `machine-cp` pass. Instead, change to: $agpr28_agpr29 = SCRATCH_LOAD_DWORDX2_ST 112, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.17 + 48, align 4, addrspace 5) Fixes #131386.
1 parent 2b06410 commit 5f0ea4f

9 files changed

+408
-406
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1910,8 +1910,10 @@ void SIRegisterInfo::buildSpillLoadStore(
19101910
MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
19111911
}
19121912

1913-
if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
1914-
MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
1913+
if (NeedSuperRegImpOperand && IsFirstSubReg) {
1914+
unsigned State = RegState::Implicit | SrcDstRegState;
1915+
MIB.addReg(ValueReg, State);
1916+
}
19151917

19161918
// The epilog restore of a wwm-scratch register can cause undesired
19171919
// optimization during machine-cp post PrologEpilogInserter if the same

llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,7 +1046,7 @@ body: |
10461046
; GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
10471047
; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
10481048
; GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 4, addrspace 5)
1049-
; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
1049+
; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
10501050
; GFX908-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
10511051
; GFX908-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
10521052
; GFX908-NEXT: {{ $}}
@@ -1293,7 +1293,7 @@ body: |
12931293
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5)
12941294
; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec
12951295
; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1, addrspace 5)
1296-
; GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1 + 4, addrspace 5)
1296+
; GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 4, addrspace 5)
12971297
; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5)
12981298
; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
12991299
; GFX90A-NEXT: {{ $}}
@@ -1544,7 +1544,7 @@ body: |
15441544
; GFX908-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
15451545
; GFX908-FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
15461546
; GFX908-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1 + 4, addrspace 5)
1547-
; GFX908-FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
1547+
; GFX908-FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
15481548
; GFX908-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
15491549
; GFX908-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
15501550
; GFX908-FLATSCR-NEXT: {{ $}}
@@ -2071,7 +2071,7 @@ body: |
20712071
; GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 4, addrspace 5)
20722072
; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
20732073
; GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 8, addrspace 5)
2074-
; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
2074+
; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
20752075
; GFX908-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
20762076
; GFX908-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
20772077
; GFX908-NEXT: {{ $}}
@@ -2319,7 +2319,7 @@ body: |
23192319
; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec
23202320
; GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1, addrspace 5)
23212321
; GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 4, addrspace 5)
2322-
; GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1 + 8, addrspace 5)
2322+
; GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 8, addrspace 5)
23232323
; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5)
23242324
; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
23252325
; GFX90A-NEXT: {{ $}}
@@ -2572,7 +2572,7 @@ body: |
25722572
; GFX908-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1 + 4, addrspace 5)
25732573
; GFX908-FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
25742574
; GFX908-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1 + 8, addrspace 5)
2575-
; GFX908-FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
2575+
; GFX908-FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
25762576
; GFX908-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
25772577
; GFX908-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
25782578
; GFX908-FLATSCR-NEXT: {{ $}}
@@ -4111,10 +4111,10 @@ body: |
41114111
; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
41124112
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
41134113
; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
4114-
; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1
4114+
; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1
41154115
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.1, addrspace 5)
4116-
; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
4117-
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.1 + 4, addrspace 5)
4116+
; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1
4117+
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1 + 4, addrspace 5)
41184118
; GFX908-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
41194119
; GFX908-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
41204120
; GFX908-NEXT: {{ $}}
@@ -4361,7 +4361,7 @@ body: |
43614361
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5)
43624362
; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec
43634363
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store (s32) into %stack.1, addrspace 5)
4364-
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr1, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.1 + 4, addrspace 5)
4364+
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr1, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1 + 4, addrspace 5)
43654365
; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5)
43664366
; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
43674367
; GFX90A-NEXT: {{ $}}
@@ -4609,10 +4609,10 @@ body: |
46094609
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
46104610
; GFX908-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
46114611
; GFX908-FLATSCR-NEXT: $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec
4612-
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1
4612+
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1
46134613
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: (store (s32) into %stack.1, addrspace 5)
4614-
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
4615-
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: (store (s32) into %stack.1 + 4, addrspace 5)
4614+
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1
4615+
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1 + 4, addrspace 5)
46164616
; GFX908-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
46174617
; GFX908-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
46184618
; GFX908-FLATSCR-NEXT: {{ $}}
@@ -5132,12 +5132,12 @@ body: |
51325132
; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
51335133
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
51345134
; GFX908-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
5135-
; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
5135+
; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
51365136
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1, addrspace 5)
51375137
; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
51385138
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1 + 4, addrspace 5)
5139-
; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
5140-
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 8, addrspace 5)
5139+
; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
5140+
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1 + 8, addrspace 5)
51415141
; GFX908-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
51425142
; GFX908-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
51435143
; GFX908-NEXT: {{ $}}
@@ -5385,7 +5385,7 @@ body: |
53855385
; GFX90A-NEXT: $vgpr40 = V_MOV_B32_e32 8904, implicit $exec
53865386
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1, addrspace 5)
53875387
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr1, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1 + 4, addrspace 5)
5388-
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr2, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 8, addrspace 5)
5388+
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFEN $agpr2, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1 + 8, addrspace 5)
53895389
; GFX90A-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5)
53905390
; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
53915391
; GFX90A-NEXT: {{ $}}
@@ -5633,12 +5633,12 @@ body: |
56335633
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
56345634
; GFX908-FLATSCR-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
56355635
; GFX908-FLATSCR-NEXT: $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec
5636-
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
5636+
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
56375637
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1, addrspace 5)
56385638
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
56395639
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1 + 4, addrspace 5)
5640-
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
5641-
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 8, addrspace 5)
5640+
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
5641+
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1 + 8, addrspace 5)
56425642
; GFX908-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
56435643
; GFX908-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
56445644
; GFX908-FLATSCR-NEXT: {{ $}}

llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,16 +65,16 @@ body: |
6565
; GCN-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
6666
; GCN-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31
6767
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15, implicit $vgpr14_vgpr15 :: (store (s32) into %stack.1, addrspace 5)
68-
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec, implicit killed $vgpr14_vgpr15 :: (store (s32) into %stack.1 + 4, addrspace 5)
68+
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.1 + 4, addrspace 5)
6969
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit-def $vgpr10_vgpr11, implicit $vgpr10_vgpr11 :: (store (s32) into %stack.2, addrspace 5)
70-
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec, implicit killed $vgpr10_vgpr11 :: (store (s32) into %stack.2 + 4, addrspace 5)
70+
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.2 + 4, addrspace 5)
7171
; GCN-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu, implicit-def dead $vgpr0
7272
; GCN-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15 :: (load (s32) from %stack.1, addrspace 5)
73-
; GCN-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15 :: (load (s32) from %stack.1 + 4, addrspace 5)
73+
; GCN-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 4, addrspace 5)
7474
; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, killed $vgpr45_vgpr46, 0, killed $vgpr41_vgpr42, 0, killed $vgpr60_vgpr61, 0, 0, implicit $mode, implicit $exec
7575
; GCN-NEXT: FLAT_STORE_DWORDX2 killed renamable $vgpr58_vgpr59, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
7676
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.2, addrspace 5)
77-
; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.2 + 4, addrspace 5)
77+
; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (load (s32) from %stack.2 + 4, addrspace 5)
7878
; GCN-NEXT: FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr56_vgpr57, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
7979
; GCN-NEXT: {{ $}}
8080
; GCN-NEXT: bb.2:

0 commit comments

Comments
 (0)