Skip to content

Commit 1f0aadf

Browse files
committed
[AMDGPU] Fix kill flag on overlapping sgpr copy
Same as on vgpr copies, we cannot kill the source register if it overlaps with the destination register. Otherwise, the kill of the source register will also count as a kill for the destination register. Differential Revision: https://reviews.llvm.org/D120042
1 parent 0136a44 commit 1f0aadf

File tree

2 files changed

+52
-1
lines changed

2 files changed

+52
-1
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -930,7 +930,9 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
930930
reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
931931
return;
932932
}
933-
expandSGPRCopy(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RC, Forward);
933+
const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
934+
expandSGPRCopy(*this, MBB, MI, DL, DestReg, SrcReg, CanKillSuperReg, RC,
935+
Forward);
934936
return;
935937
}
936938

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=postrapseudos -o - %s | FileCheck %s
3+
4+
# Don't set a kill of the super register on the last instruction with
5+
# an overlapping copy. This would kill part of the values in the
6+
# result copies.
7+
8+
---
9+
name: overlapping_copy_kill_undef_reg_after_copy
10+
tracksRegLiveness: true
11+
body: |
12+
bb.0:
13+
liveins: $sgpr30_sgpr31, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
14+
15+
; CHECK-LABEL: name: overlapping_copy_kill_undef_reg_after_copy
16+
; CHECK: liveins: $sgpr30_sgpr31, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
17+
; CHECK-NEXT: {{ $}}
18+
; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
19+
; CHECK-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $sgpr6_sgpr7, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
20+
; CHECK-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
21+
; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
22+
; CHECK-NEXT: renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc
23+
; CHECK-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
24+
renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
25+
renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc
26+
S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
27+
28+
...
29+
30+
---
31+
name: nonoverlapping_copy_kill
32+
tracksRegLiveness: true
33+
body: |
34+
bb.0:
35+
liveins: $sgpr30_sgpr31, $sgpr3_sgpr4_sgpr5
36+
37+
; CHECK-LABEL: name: nonoverlapping_copy_kill
38+
; CHECK: liveins: $sgpr30_sgpr31, $sgpr3_sgpr4_sgpr5
39+
; CHECK-NEXT: {{ $}}
40+
; CHECK-NEXT: $sgpr0 = S_MOV_B32 $sgpr3, implicit $sgpr3_sgpr4_sgpr5, implicit-def $sgpr0_sgpr1_sgpr2
41+
; CHECK-NEXT: $sgpr1 = S_MOV_B32 $sgpr4, implicit $sgpr3_sgpr4_sgpr5
42+
; CHECK-NEXT: $sgpr2 = S_MOV_B32 $sgpr5, implicit killed $sgpr3_sgpr4_sgpr5
43+
; CHECK-NEXT: renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc
44+
; CHECK-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
45+
renamable $sgpr0_sgpr1_sgpr2 = COPY killed renamable $sgpr3_sgpr4_sgpr5
46+
renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc
47+
S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
48+
49+
...

0 commit comments

Comments
 (0)