Skip to content

Commit a907854

Browse files
committed
[NFC][LLVM] Make s_getpc_b64 rematerializable
1 parent b002b38 commit a907854

File tree

2 files changed

+134
-0
lines changed

2 files changed

+134
-0
lines changed

llvm/test/CodeGen/AMDGPU/remat-sop.mir

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,3 +573,92 @@ body: |
573573
S_NOP 0, implicit %2
574574
S_ENDPGM 0
575575
...
576+
577+
---
578+
name: test_remat_s_getpc_b64
579+
tracksRegLiveness: true
580+
body: |
581+
bb.0:
582+
583+
; GCN-LABEL: name: test_remat_s_getpc_b64
584+
; GCN: renamable $sgpr0_sgpr1 = S_GETPC_B64
585+
; GCN-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr0_sgpr1, %stack.1, implicit $exec, implicit $sp_reg :: (store (s64) into %stack.1, align 4, addrspace 5)
586+
; GCN-NEXT: renamable $sgpr2_sgpr3 = S_GETPC_B64
587+
; GCN-NEXT: renamable $sgpr0_sgpr1 = S_GETPC_B64
588+
; GCN-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sp_reg :: (store (s64) into %stack.0, align 4, addrspace 5)
589+
; GCN-NEXT: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.1, align 4, addrspace 5)
590+
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1
591+
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr2_sgpr3
592+
; GCN-NEXT: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.0, align 4, addrspace 5)
593+
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1
594+
; GCN-NEXT: S_ENDPGM 0
595+
%0:sgpr_64 = S_GETPC_B64
596+
%1:sgpr_64 = S_GETPC_B64
597+
%2:sgpr_64 = S_GETPC_B64
598+
S_NOP 0, implicit %0
599+
S_NOP 0, implicit %1
600+
S_NOP 0, implicit %2
601+
S_ENDPGM 0
602+
...
603+
604+
---
605+
name: test_remat_s_getpc_b64_2
606+
tracksRegLiveness: true
607+
body: |
608+
bb.0:
609+
610+
; GCN-LABEL: name: test_remat_s_getpc_b64_2
611+
; GCN: renamable $sgpr0_sgpr1 = S_GETPC_B64
612+
; GCN-NEXT: renamable $sgpr2_sgpr3 = S_GETPC_B64
613+
; GCN-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr2_sgpr3, %stack.7, implicit $exec, implicit $sp_reg :: (store (s64) into %stack.7, align 4, addrspace 5)
614+
; GCN-NEXT: renamable $sgpr2_sgpr3 = S_GETPC_B64
615+
; GCN-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr2_sgpr3, %stack.6, implicit $exec, implicit $sp_reg :: (store (s64) into %stack.6, align 4, addrspace 5)
616+
; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.3, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.3, addrspace 5)
617+
; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.0, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.0, addrspace 5)
618+
; GCN-NEXT: renamable $sgpr2_sgpr3 = SI_SPILL_S64_RESTORE %stack.7, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.7, align 4, addrspace 5)
619+
; GCN-NEXT: renamable $sgpr1 = COPY renamable $sgpr2
620+
; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.1, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.1, addrspace 5)
621+
; GCN-NEXT: renamable $sgpr1 = COPY killed renamable $sgpr3
622+
; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.2, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.2, addrspace 5)
623+
; GCN-NEXT: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.6, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.6, align 4, addrspace 5)
624+
; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.5, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.5, addrspace 5)
625+
; GCN-NEXT: renamable $sgpr0 = COPY killed renamable $sgpr1
626+
; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.4, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.4, addrspace 5)
627+
; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5)
628+
; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.3, addrspace 5)
629+
; GCN-NEXT: dead renamable $sgpr0 = S_ADD_U32 killed renamable $sgpr1, killed renamable $sgpr0, implicit-def $scc
630+
; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5)
631+
; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.2, addrspace 5)
632+
; GCN-NEXT: dead renamable $sgpr0 = S_ADDC_U32 killed renamable $sgpr0, killed renamable $sgpr1, implicit-def $scc, implicit $scc
633+
; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.3, addrspace 5)
634+
; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.5, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.5, addrspace 5)
635+
; GCN-NEXT: dead renamable $sgpr0 = S_ADD_U32 killed renamable $sgpr0, killed renamable $sgpr1, implicit-def $scc
636+
; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5)
637+
; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.4, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.4, addrspace 5)
638+
; GCN-NEXT: dead renamable $sgpr0 = S_ADDC_U32 killed renamable $sgpr0, killed renamable $sgpr1, implicit-def $scc, implicit $scc
639+
; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5)
640+
; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.5, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.5, addrspace 5)
641+
; GCN-NEXT: dead renamable $sgpr0 = S_ADD_U32 killed renamable $sgpr0, killed renamable $sgpr1, implicit-def $scc
642+
; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.2, addrspace 5)
643+
; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.4, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.4, addrspace 5)
644+
; GCN-NEXT: dead renamable $sgpr0 = S_ADDC_U32 killed renamable $sgpr0, killed renamable $sgpr1, implicit-def $scc, implicit $scc
645+
; GCN-NEXT: S_ENDPGM 0
646+
%0:sreg_64 = S_GETPC_B64
647+
%1:sreg_64 = S_GETPC_B64
648+
%2:sreg_64 = S_GETPC_B64
649+
%4:sreg_32 = COPY %0.sub0:sreg_64
650+
%5:sreg_32 = COPY %0.sub1:sreg_64
651+
%6:sreg_32 = COPY %1.sub0:sreg_64
652+
%7:sreg_32 = COPY %1.sub1:sreg_64
653+
%8:sreg_32 = COPY %2.sub0:sreg_64
654+
%9:sreg_32 = COPY %2.sub1:sreg_64
655+
%10:sreg_32 = S_ADD_U32 %4:sreg_32, %6:sreg_32, implicit-def $scc
656+
%11:sreg_32 = S_ADDC_U32 %5:sreg_32, %7:sreg_32, implicit-def $scc, implicit $scc
657+
%12:sreg_32 = S_ADD_U32 %4:sreg_32, %8:sreg_32, implicit-def $scc
658+
%13:sreg_32 = S_ADDC_U32 %5:sreg_32, %9:sreg_32, implicit-def $scc, implicit $scc
659+
%14:sreg_32 = S_ADD_U32 %6:sreg_32, %8:sreg_32, implicit-def $scc
660+
%15:sreg_32 = S_ADDC_U32 %7:sreg_32, %9:sreg_32, implicit-def $scc, implicit $scc
661+
S_ENDPGM 0
662+
...
663+
664+
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stress-regalloc=2 -verify-machineinstrs < %s | FileCheck %s
3+
4+
5+
define void @test_remat_s_getpc_b64() {
6+
; CHECK-LABEL: test_remat_s_getpc_b64:
7+
; CHECK: ; %bb.0: ; %entry
8+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9+
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
10+
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
11+
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
12+
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
13+
; CHECK-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
14+
; CHECK-NEXT: s_getpc_b64 s[4:5]
15+
; CHECK-NEXT: v_writelane_b32 v0, s30, 0
16+
; CHECK-NEXT: v_writelane_b32 v1, s4, 0
17+
; CHECK-NEXT: v_writelane_b32 v0, s31, 1
18+
; CHECK-NEXT: v_writelane_b32 v1, s5, 1
19+
; CHECK-NEXT: ;;#ASMSTART
20+
; CHECK-NEXT: ;;#ASMEND
21+
; CHECK-NEXT: ;;#ASMSTART
22+
; CHECK-NEXT: ;;#ASMEND
23+
; CHECK-NEXT: v_readlane_b32 s4, v1, 0
24+
; CHECK-NEXT: v_readlane_b32 s5, v1, 1
25+
; CHECK-NEXT: v_mov_b32_e32 v2, s4
26+
; CHECK-NEXT: v_mov_b32_e32 v3, s5
27+
; CHECK-NEXT: global_store_dwordx2 v[1:2], v[2:3], off
28+
; CHECK-NEXT: v_readlane_b32 s31, v0, 1
29+
; CHECK-NEXT: v_readlane_b32 s30, v0, 0
30+
; CHECK-NEXT: ; kill: killed $vgpr1
31+
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
32+
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
33+
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
34+
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
35+
; CHECK-NEXT: s_waitcnt vmcnt(0)
36+
; CHECK-NEXT: s_setpc_b64 s[30:31]
37+
entry:
38+
%0 = tail call i64 @llvm.amdgcn.s.getpc()
39+
tail call void asm sideeffect "", "s"(i64 %0)
40+
tail call void asm sideeffect "", "~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9},~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19},~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29},~{s30},~{s31}"()
41+
store i64 %0, ptr addrspace(1) undef
42+
ret void
43+
}
44+
45+
declare i64 @llvm.amdgcn.s.getpc()

0 commit comments

Comments
 (0)