Skip to content

Commit 376b22a

Browse files
authored
[LLVM] Make s_getpc_b64 rematerializable (llvm#71823)
1 parent c43c885 commit 376b22a

File tree

3 files changed

+120
-0
lines changed

3 files changed

+120
-0
lines changed

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,7 @@ def S_BITSET0_B64 : SOP1_64_32 <"s_bitset0_b64", [], 1>;
292292
def S_BITSET1_B32 : SOP1_32 <"s_bitset1_b32", [], 1>;
293293
def S_BITSET1_B64 : SOP1_64_32 <"s_bitset1_b64", [], 1>;
294294

295+
let isReMaterializable = 1 in
295296
def S_GETPC_B64 : SOP1_64_0 <"s_getpc_b64",
296297
[(set i64:$sdst, (int_amdgcn_s_getpc))]
297298
>;

llvm/test/CodeGen/AMDGPU/remat-sop.mir

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,3 +573,84 @@ body: |
573573
S_NOP 0, implicit %2
574574
S_ENDPGM 0
575575
...
576+
577+
---
578+
name: test_remat_s_getpc_b64
579+
tracksRegLiveness: true
580+
body: |
581+
bb.0:
582+
583+
; GCN-LABEL: name: test_remat_s_getpc_b64
584+
; GCN: renamable $sgpr0_sgpr1 = S_GETPC_B64
585+
; GCN-NEXT: renamable $sgpr2_sgpr3 = S_GETPC_B64
586+
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1
587+
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr2_sgpr3
588+
; GCN-NEXT: renamable $sgpr0_sgpr1 = S_GETPC_B64
589+
; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1
590+
; GCN-NEXT: S_ENDPGM 0
591+
%0:sgpr_64 = S_GETPC_B64
592+
%1:sgpr_64 = S_GETPC_B64
593+
%2:sgpr_64 = S_GETPC_B64
594+
S_NOP 0, implicit %0
595+
S_NOP 0, implicit %1
596+
S_NOP 0, implicit %2
597+
S_ENDPGM 0
598+
...
599+
600+
---
601+
name: test_remat_s_getpc_b64_2
602+
tracksRegLiveness: true
603+
body: |
604+
bb.0:
605+
606+
; GCN-LABEL: name: test_remat_s_getpc_b64_2
607+
; GCN: renamable $sgpr0_sgpr1 = S_GETPC_B64
608+
; GCN-NEXT: renamable $sgpr2_sgpr3 = S_GETPC_B64
609+
; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.3, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.3, addrspace 5)
610+
; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.0, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.0, addrspace 5)
611+
; GCN-NEXT: renamable $sgpr1 = COPY renamable $sgpr2
612+
; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.1, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.1, addrspace 5)
613+
; GCN-NEXT: renamable $sgpr1 = COPY killed renamable $sgpr3
614+
; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.2, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.2, addrspace 5)
615+
; GCN-NEXT: renamable $sgpr0_sgpr1 = S_GETPC_B64
616+
; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.5, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.5, addrspace 5)
617+
; GCN-NEXT: renamable $sgpr0 = COPY killed renamable $sgpr1
618+
; GCN-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.4, implicit $exec, implicit $sp_reg :: (store (s32) into %stack.4, addrspace 5)
619+
; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5)
620+
; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.3, addrspace 5)
621+
; GCN-NEXT: dead renamable $sgpr0 = S_ADD_U32 killed renamable $sgpr1, killed renamable $sgpr0, implicit-def $scc
622+
; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5)
623+
; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.2, addrspace 5)
624+
; GCN-NEXT: dead renamable $sgpr0 = S_ADDC_U32 killed renamable $sgpr0, killed renamable $sgpr1, implicit-def $scc, implicit $scc
625+
; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.3, addrspace 5)
626+
; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.5, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.5, addrspace 5)
627+
; GCN-NEXT: dead renamable $sgpr0 = S_ADD_U32 killed renamable $sgpr0, killed renamable $sgpr1, implicit-def $scc
628+
; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.0, addrspace 5)
629+
; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.4, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.4, addrspace 5)
630+
; GCN-NEXT: dead renamable $sgpr0 = S_ADDC_U32 killed renamable $sgpr0, killed renamable $sgpr1, implicit-def $scc, implicit $scc
631+
; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.1, addrspace 5)
632+
; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.5, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.5, addrspace 5)
633+
; GCN-NEXT: dead renamable $sgpr0 = S_ADD_U32 killed renamable $sgpr0, killed renamable $sgpr1, implicit-def $scc
634+
; GCN-NEXT: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.2, addrspace 5)
635+
; GCN-NEXT: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.4, implicit $exec, implicit $sp_reg :: (load (s32) from %stack.4, addrspace 5)
636+
; GCN-NEXT: dead renamable $sgpr0 = S_ADDC_U32 killed renamable $sgpr0, killed renamable $sgpr1, implicit-def $scc, implicit $scc
637+
; GCN-NEXT: S_ENDPGM 0
638+
%0:sreg_64 = S_GETPC_B64
639+
%1:sreg_64 = S_GETPC_B64
640+
%2:sreg_64 = S_GETPC_B64
641+
%4:sreg_32 = COPY %0.sub0:sreg_64
642+
%5:sreg_32 = COPY %0.sub1:sreg_64
643+
%6:sreg_32 = COPY %1.sub0:sreg_64
644+
%7:sreg_32 = COPY %1.sub1:sreg_64
645+
%8:sreg_32 = COPY %2.sub0:sreg_64
646+
%9:sreg_32 = COPY %2.sub1:sreg_64
647+
%10:sreg_32 = S_ADD_U32 %4:sreg_32, %6:sreg_32, implicit-def $scc
648+
%11:sreg_32 = S_ADDC_U32 %5:sreg_32, %7:sreg_32, implicit-def $scc, implicit $scc
649+
%12:sreg_32 = S_ADD_U32 %4:sreg_32, %8:sreg_32, implicit-def $scc
650+
%13:sreg_32 = S_ADDC_U32 %5:sreg_32, %9:sreg_32, implicit-def $scc, implicit $scc
651+
%14:sreg_32 = S_ADD_U32 %6:sreg_32, %8:sreg_32, implicit-def $scc
652+
%15:sreg_32 = S_ADDC_U32 %7:sreg_32, %9:sreg_32, implicit-def $scc, implicit $scc
653+
S_ENDPGM 0
654+
...
655+
656+
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stress-regalloc=2 -verify-machineinstrs < %s | FileCheck %s
3+
4+
5+
define void @test_remat_s_getpc_b64() {
6+
; CHECK-LABEL: test_remat_s_getpc_b64:
7+
; CHECK: ; %bb.0: ; %entry
8+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9+
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
10+
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
11+
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
12+
; CHECK-NEXT: v_writelane_b32 v0, s30, 0
13+
; CHECK-NEXT: s_getpc_b64 s[4:5]
14+
; CHECK-NEXT: v_writelane_b32 v0, s31, 1
15+
; CHECK-NEXT: ;;#ASMSTART
16+
; CHECK-NEXT: ;;#ASMEND
17+
; CHECK-NEXT: ;;#ASMSTART
18+
; CHECK-NEXT: ;;#ASMEND
19+
; CHECK-NEXT: s_getpc_b64 s[4:5]
20+
; CHECK-NEXT: v_mov_b32_e32 v1, s4
21+
; CHECK-NEXT: v_mov_b32_e32 v2, s5
22+
; CHECK-NEXT: global_store_dwordx2 v[1:2], v[1:2], off
23+
; CHECK-NEXT: v_readlane_b32 s31, v0, 1
24+
; CHECK-NEXT: v_readlane_b32 s30, v0, 0
25+
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
26+
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
27+
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
28+
; CHECK-NEXT: s_waitcnt vmcnt(0)
29+
; CHECK-NEXT: s_setpc_b64 s[30:31]
30+
entry:
31+
%0 = tail call i64 @llvm.amdgcn.s.getpc()
32+
tail call void asm sideeffect "", "s"(i64 %0)
33+
tail call void asm sideeffect "", "~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9},~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19},~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29},~{s30},~{s31}"()
34+
store i64 %0, ptr addrspace(1) undef
35+
ret void
36+
}
37+
38+
declare i64 @llvm.amdgcn.s.getpc()

0 commit comments

Comments
 (0)