Skip to content

Commit 6752f15

Browse files
authored
[TwoAddressInstruction] Recompute live intervals for partial defs (#74431)
Force live interval recomputation for a register if its definition is narrowed to become partial. The live interval repair process cannot otherwise detect these changes.
1 parent cc0065a commit 6752f15

File tree

6 files changed

+67
-2
lines changed

6 files changed

+67
-2
lines changed

llvm/lib/CodeGen/TwoAddressInstructionPass.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1936,13 +1936,16 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
19361936
}
19371937

19381938
bool DefEmitted = false;
1939+
bool DefIsPartial = false;
19391940
for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) {
19401941
MachineOperand &UseMO = MI.getOperand(i);
19411942
Register SrcReg = UseMO.getReg();
19421943
unsigned SubIdx = MI.getOperand(i+1).getImm();
19431944
// Nothing needs to be inserted for undef operands.
1944-
if (UseMO.isUndef())
1945+
if (UseMO.isUndef()) {
1946+
DefIsPartial = true;
19451947
continue;
1948+
}
19461949

19471950
// Defer any kill flag to the last operand using SrcReg. Otherwise, we
19481951
// might insert a COPY that uses SrcReg after is was killed.
@@ -1987,8 +1990,14 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
19871990
for (int j = MI.getNumOperands() - 1, ee = 0; j > ee; --j)
19881991
MI.removeOperand(j);
19891992
} else {
1990-
if (LIS)
1993+
if (LIS) {
1994+
// Force interval recomputation if we moved from full definition
1995+
// of register to partial.
1996+
if (DefIsPartial && LIS->hasInterval(DstReg) &&
1997+
MRI->shouldTrackSubRegLiveness(DstReg))
1998+
LIS->removeInterval(DstReg);
19911999
LIS->RemoveMachineInstrFromMaps(MI);
2000+
}
19922001

19932002
LLVM_DEBUG(dbgs() << "Eliminated: " << MI);
19942003
MI.eraseFromParent();

llvm/test/CodeGen/AMDGPU/ds_gws_align.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX90A %s
33
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX908 %s
44
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX90A %s
5+
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -early-live-intervals -o - -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX908 %s
56

67
; GCN-LABEL: {{^}}gws_init_odd_reg:
78
; GFX908-DAG: ds_gws_init v1 gds
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=liveintervals -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefix=GFX90A %s
3+
4+
---
5+
name: aligned_partial_vgpr_64
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0:
9+
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1
10+
11+
; GFX90A-LABEL: name: aligned_partial_vgpr_64
12+
; GFX90A: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1
13+
; GFX90A-NEXT: {{ $}}
14+
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
15+
; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
16+
; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr7
17+
; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr6
18+
; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr5
19+
; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr4
20+
; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr3
21+
; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr2
22+
; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr1
23+
; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr0
24+
; GFX90A-NEXT: undef [[COPY10:%[0-9]+]].sub0:sgpr_256 = COPY [[COPY9]]
25+
; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub1:sgpr_256 = COPY [[COPY8]]
26+
; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub2:sgpr_256 = COPY [[COPY7]]
27+
; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub3:sgpr_256 = COPY [[COPY6]]
28+
; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub4:sgpr_256 = COPY [[COPY5]]
29+
; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub5:sgpr_256 = COPY [[COPY4]]
30+
; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub6:sgpr_256 = COPY [[COPY3]]
31+
; GFX90A-NEXT: [[COPY10:%[0-9]+]].sub7:sgpr_256 = COPY [[COPY2]]
32+
; GFX90A-NEXT: undef [[COPY11:%[0-9]+]].sub0:vreg_64_align2 = COPY [[COPY]]
33+
; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
34+
; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = IMAGE_ATOMIC_SWAP_V1_V1_gfx90a [[COPY12]], [[COPY11]].sub0, [[COPY10]], 1, -1, 1, 0, 0, 0, implicit $exec, implicit [[COPY11]] :: (volatile dereferenceable load store (s32), addrspace 8)
35+
; GFX90A-NEXT: $vgpr0 = COPY [[COPY12]]
36+
; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0
37+
%9:vgpr_32 = COPY $vgpr1
38+
%8:vgpr_32 = COPY $vgpr0
39+
%7:sgpr_32 = COPY $sgpr7
40+
%6:sgpr_32 = COPY $sgpr6
41+
%5:sgpr_32 = COPY $sgpr5
42+
%4:sgpr_32 = COPY $sgpr4
43+
%3:sgpr_32 = COPY $sgpr3
44+
%2:sgpr_32 = COPY $sgpr2
45+
%1:sgpr_32 = COPY $sgpr1
46+
%0:sgpr_32 = COPY $sgpr0
47+
%11:sgpr_256 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3, %4:sgpr_32, %subreg.sub4, %5:sgpr_32, %subreg.sub5, %6:sgpr_32, %subreg.sub6, %7:sgpr_32, %subreg.sub7
48+
%14:vreg_64_align2 = REG_SEQUENCE %9:vgpr_32, %subreg.sub0, undef %13:vgpr_32, %subreg.sub1
49+
%12:vgpr_32 = IMAGE_ATOMIC_SWAP_V1_V1_gfx90a %8:vgpr_32, %14.sub0:vreg_64_align2, %11:sgpr_256, 1, -1, 1, 0, 0, 0, implicit $exec, implicit %14:vreg_64_align2 :: (volatile dereferenceable load store (s32), addrspace 8)
50+
$vgpr0 = COPY %12:vgpr_32
51+
SI_RETURN_TO_EPILOG $vgpr0
52+
...

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789 %s
22
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789 %s
33
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX90A %s
4+
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs -early-live-intervals < %s | FileCheck -check-prefixes=GCN,GFX90A %s
45
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX10 %s
56
; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX10 %s
67
; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX12 %s

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
2+
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs -early-live-intervals < %s | FileCheck -check-prefixes=GCN %s
23
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
34

45
; GCN-LABEL: {{^}}load_1d:

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A,SDAG %s
2+
; RUN: llc -march=amdgcn -mcpu=gfx90a -early-live-intervals -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A,SDAG %s
23
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A,GISEL %s
34

45
; GFX90A-LABEL: {{^}}sample_1d:

0 commit comments

Comments
 (0)