Skip to content

Commit 8f454eb

Browse files
committed
Ignore AMDGPU::SI_SPILL_S32_TO_VGPR in STRICT_WWM regions.
1 parent 24a245e commit 8f454eb

File tree

3 files changed

+269
-188
lines changed

3 files changed

+269
-188
lines changed

llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,9 @@ bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
215215
MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
216216
RegsAssigned |= processDef(MI.getOperand(0));
217217

218+
if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR)
219+
continue;
220+
218221
if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM ||
219222
MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM ||
220223
MI.getOpcode() == AMDGPU::ENTER_PSEUDO_WM) {

llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll

Lines changed: 50 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -559,27 +559,28 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
559559
; GFX9-O0-LABEL: strict_wwm_call_i64:
560560
; GFX9-O0: ; %bb.0:
561561
; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
562-
; GFX9-O0-NEXT: s_mov_b32 s46, s33
562+
; GFX9-O0-NEXT: s_mov_b32 s48, s33
563563
; GFX9-O0-NEXT: s_mov_b32 s33, s32
564564
; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1
565565
; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s33 ; 4-byte Folded Spill
566-
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
567-
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
568-
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
569-
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
570-
; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
571-
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
566+
; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
567+
; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
568+
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
572569
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
573-
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
570+
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
571+
; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
574572
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
575-
; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
576573
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
574+
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
577575
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
578-
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
579-
; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
576+
; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
577+
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
578+
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
579+
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
580+
; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
580581
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
581582
; GFX9-O0-NEXT: s_add_i32 s32, s32, 0x1000
582-
; GFX9-O0-NEXT: ; implicit-def: $vgpr11 : SGPR spill to VGPR lane
583+
; GFX9-O0-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
583584
; GFX9-O0-NEXT: v_writelane_b32 v8, s30, 0
584585
; GFX9-O0-NEXT: v_writelane_b32 v8, s31, 1
585586
; GFX9-O0-NEXT: s_mov_b32 s34, s8
@@ -597,10 +598,10 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
597598
; GFX9-O0-NEXT: s_mov_b32 s41, s45
598599
; GFX9-O0-NEXT: s_mov_b32 s42, s44
599600
; GFX9-O0-NEXT: s_mov_b32 s43, s35
600-
; GFX9-O0-NEXT: v_writelane_b32 v11, s40, 0
601-
; GFX9-O0-NEXT: v_writelane_b32 v11, s41, 1
602-
; GFX9-O0-NEXT: v_writelane_b32 v11, s42, 2
603-
; GFX9-O0-NEXT: v_writelane_b32 v11, s43, 3
601+
; GFX9-O0-NEXT: v_writelane_b32 v0, s40, 0
602+
; GFX9-O0-NEXT: v_writelane_b32 v0, s41, 1
603+
; GFX9-O0-NEXT: v_writelane_b32 v0, s42, 2
604+
; GFX9-O0-NEXT: v_writelane_b32 v0, s43, 3
604605
; GFX9-O0-NEXT: ; kill: def $sgpr34 killed $sgpr34 def $sgpr34_sgpr35
605606
; GFX9-O0-NEXT: s_mov_b32 s35, s9
606607
; GFX9-O0-NEXT: ; kill: def $sgpr36_sgpr37 killed $sgpr34_sgpr35
@@ -612,8 +613,11 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
612613
; GFX9-O0-NEXT: v_mov_b32_e32 v10, s37
613614
; GFX9-O0-NEXT: s_not_b64 exec, exec
614615
; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1
615-
; GFX9-O0-NEXT: v_writelane_b32 v11, s34, 4
616-
; GFX9-O0-NEXT: v_writelane_b32 v11, s35, 5
616+
; GFX9-O0-NEXT: v_writelane_b32 v0, s34, 4
617+
; GFX9-O0-NEXT: v_writelane_b32 v0, s35, 5
618+
; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1
619+
; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
620+
; GFX9-O0-NEXT: s_mov_b64 exec, s[46:47]
617621
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9
618622
; GFX9-O0-NEXT: s_mov_b32 s34, 32
619623
; GFX9-O0-NEXT: ; implicit-def: $sgpr36_sgpr37
@@ -630,13 +634,20 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
630634
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
631635
; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0)
632636
; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[34:35]
633-
; GFX9-O0-NEXT: v_readlane_b32 s34, v11, 4
634-
; GFX9-O0-NEXT: v_readlane_b32 s35, v11, 5
635-
; GFX9-O0-NEXT: v_readlane_b32 s36, v11, 0
636-
; GFX9-O0-NEXT: v_readlane_b32 s37, v11, 1
637-
; GFX9-O0-NEXT: v_readlane_b32 s38, v11, 2
638-
; GFX9-O0-NEXT: v_readlane_b32 s39, v11, 3
637+
; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1
638+
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
639+
; GFX9-O0-NEXT: s_mov_b64 exec, s[46:47]
640+
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
641+
; GFX9-O0-NEXT: v_readlane_b32 s34, v6, 4
642+
; GFX9-O0-NEXT: v_readlane_b32 s35, v6, 5
643+
; GFX9-O0-NEXT: v_readlane_b32 s36, v6, 0
644+
; GFX9-O0-NEXT: v_readlane_b32 s37, v6, 1
645+
; GFX9-O0-NEXT: v_readlane_b32 s38, v6, 2
646+
; GFX9-O0-NEXT: v_readlane_b32 s39, v6, 3
639647
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0
648+
; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1
649+
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
650+
; GFX9-O0-NEXT: s_mov_b64 exec, s[46:47]
640651
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
641652
; GFX9-O0-NEXT: ; implicit-def: $sgpr40
642653
; GFX9-O0-NEXT: ; implicit-def: $sgpr40
@@ -645,29 +656,30 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
645656
; GFX9-O0-NEXT: v_add_co_u32_e64 v2, s[40:41], v2, v4
646657
; GFX9-O0-NEXT: v_addc_co_u32_e64 v3, s[40:41], v3, v5, s[40:41]
647658
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
648-
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2
649-
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
659+
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2
660+
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3
650661
; GFX9-O0-NEXT: s_mov_b32 s34, 0
651-
; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[36:39], s34 offset:4
662+
; GFX9-O0-NEXT: buffer_store_dwordx2 v[6:7], off, s[36:39], s34 offset:4
652663
; GFX9-O0-NEXT: v_readlane_b32 s31, v8, 1
653664
; GFX9-O0-NEXT: v_readlane_b32 s30, v8, 0
654-
; GFX9-O0-NEXT: ; kill: killed $vgpr11
665+
; GFX9-O0-NEXT: ; kill: killed $vgpr0
655666
; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1
656667
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s33 ; 4-byte Folded Reload
657-
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
658-
; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
659-
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
660-
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
661-
; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
662-
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
668+
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
669+
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
670+
; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
671+
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
672+
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
663673
; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
664-
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
674+
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
665675
; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
666-
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
667-
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
676+
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
677+
; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
678+
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
679+
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
668680
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
669681
; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffff000
670-
; GFX9-O0-NEXT: s_mov_b32 s33, s46
682+
; GFX9-O0-NEXT: s_mov_b32 s33, s48
671683
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
672684
; GFX9-O0-NEXT: s_setpc_b64 s[30:31]
673685
;

0 commit comments

Comments
 (0)