Skip to content

Commit 9bda1de

Browse files
authored
[TwoAddressInstruction] Propagate undef flags for partial defs (#79286)
If part of a register (lowered from REG_SEQUENCE) is undefined then we should propagate undef flags to uses of those lanes. This is only performed when live intervals are present as it requires live intervals to correctly match uses to defs, and the primary goal is to allow precise computation of subrange intervals.
1 parent ea4f44e commit 9bda1de

File tree

2 files changed

+151
-6
lines changed

2 files changed

+151
-6
lines changed

llvm/lib/CodeGen/TwoAddressInstructionPass.cpp

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1929,21 +1929,27 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
19291929
Register DstReg = MI.getOperand(0).getReg();
19301930

19311931
SmallVector<Register, 4> OrigRegs;
1932+
VNInfo *DefVN = nullptr;
19321933
if (LIS) {
19331934
OrigRegs.push_back(MI.getOperand(0).getReg());
19341935
for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2)
19351936
OrigRegs.push_back(MI.getOperand(i).getReg());
1937+
if (LIS->hasInterval(DstReg)) {
1938+
DefVN = LIS->getInterval(DstReg)
1939+
.Query(LIS->getInstructionIndex(MI))
1940+
.valueOut();
1941+
}
19361942
}
19371943

1944+
LaneBitmask UndefLanes = LaneBitmask::getNone();
19381945
bool DefEmitted = false;
1939-
bool DefIsPartial = false;
19401946
for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) {
19411947
MachineOperand &UseMO = MI.getOperand(i);
19421948
Register SrcReg = UseMO.getReg();
19431949
unsigned SubIdx = MI.getOperand(i+1).getImm();
19441950
// Nothing needs to be inserted for undef operands.
19451951
if (UseMO.isUndef()) {
1946-
DefIsPartial = true;
1952+
UndefLanes |= TRI->getSubRegIndexLaneMask(SubIdx);
19471953
continue;
19481954
}
19491955

@@ -1991,11 +1997,25 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
19911997
MI.removeOperand(j);
19921998
} else {
19931999
if (LIS) {
1994-
// Force interval recomputation if we moved from full definition
1995-
// of register to partial.
1996-
if (DefIsPartial && LIS->hasInterval(DstReg) &&
1997-
MRI->shouldTrackSubRegLiveness(DstReg))
2000+
// Force live interval recomputation if we moved to a partial definition
2001+
// of the register. Undef flags must be propagate to uses of undefined
2002+
// subregister for accurate interval computation.
2003+
if (UndefLanes.any() && DefVN && MRI->shouldTrackSubRegLiveness(DstReg)) {
2004+
auto &LI = LIS->getInterval(DstReg);
2005+
for (MachineOperand &UseOp : MRI->use_operands(DstReg)) {
2006+
unsigned SubReg = UseOp.getSubReg();
2007+
if (UseOp.isUndef() || !SubReg)
2008+
continue;
2009+
auto *VN =
2010+
LI.getVNInfoAt(LIS->getInstructionIndex(*UseOp.getParent()));
2011+
if (DefVN != VN)
2012+
continue;
2013+
LaneBitmask LaneMask = TRI->getSubRegIndexLaneMask(SubReg);
2014+
if ((UndefLanes & LaneMask).any())
2015+
UseOp.setIsUndef(true);
2016+
}
19982017
LIS->removeInterval(DstReg);
2018+
}
19992019
LIS->RemoveMachineInstrFromMaps(MI);
20002020
}
20012021

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
2+
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -early-live-intervals -run-pass=liveintervals -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck %s
3+
4+
---
5+
name: dyn_extract_v7f64_v_v
6+
legalized: true
7+
regBankSelected: true
8+
selected: true
9+
tracksRegLiveness: true
10+
body: |
11+
bb.0.entry:
12+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14
13+
14+
; CHECK-LABEL: name: dyn_extract_v7f64_v_v
15+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14
16+
; CHECK-NEXT: {{ $}}
17+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
18+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
19+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
20+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
21+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
22+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
23+
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
24+
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7
25+
; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8
26+
; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr9
27+
; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr10
28+
; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY $vgpr11
29+
; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY $vgpr12
30+
; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY $vgpr13
31+
; CHECK-NEXT: undef [[COPY14:%[0-9]+]].sub0:vreg_64 = COPY [[COPY]]
32+
; CHECK-NEXT: [[COPY14:%[0-9]+]].sub1:vreg_64 = COPY [[COPY1]]
33+
; CHECK-NEXT: undef [[COPY15:%[0-9]+]].sub0:vreg_64 = COPY [[COPY2]]
34+
; CHECK-NEXT: [[COPY15:%[0-9]+]].sub1:vreg_64 = COPY [[COPY3]]
35+
; CHECK-NEXT: undef [[COPY16:%[0-9]+]].sub0:vreg_64 = COPY [[COPY4]]
36+
; CHECK-NEXT: [[COPY16:%[0-9]+]].sub1:vreg_64 = COPY [[COPY5]]
37+
; CHECK-NEXT: undef [[COPY17:%[0-9]+]].sub0:vreg_64 = COPY [[COPY6]]
38+
; CHECK-NEXT: [[COPY17:%[0-9]+]].sub1:vreg_64 = COPY [[COPY7]]
39+
; CHECK-NEXT: undef [[COPY18:%[0-9]+]].sub0:vreg_64 = COPY [[COPY8]]
40+
; CHECK-NEXT: [[COPY18:%[0-9]+]].sub1:vreg_64 = COPY [[COPY9]]
41+
; CHECK-NEXT: undef [[COPY19:%[0-9]+]].sub0:vreg_64 = COPY [[COPY10]]
42+
; CHECK-NEXT: [[COPY19:%[0-9]+]].sub1:vreg_64 = COPY [[COPY11]]
43+
; CHECK-NEXT: undef [[COPY20:%[0-9]+]].sub0:vreg_64 = COPY [[COPY12]]
44+
; CHECK-NEXT: [[COPY20:%[0-9]+]].sub1:vreg_64 = COPY [[COPY13]]
45+
; CHECK-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY $vgpr14
46+
; CHECK-NEXT: undef [[COPY22:%[0-9]+]].sub0_sub1:vreg_512 = COPY [[COPY14]]
47+
; CHECK-NEXT: [[COPY22:%[0-9]+]].sub2_sub3:vreg_512 = COPY [[COPY15]]
48+
; CHECK-NEXT: [[COPY22:%[0-9]+]].sub4_sub5:vreg_512 = COPY [[COPY16]]
49+
; CHECK-NEXT: [[COPY22:%[0-9]+]].sub6_sub7:vreg_512 = COPY [[COPY17]]
50+
; CHECK-NEXT: [[COPY22:%[0-9]+]].sub8_sub9:vreg_512 = COPY [[COPY18]]
51+
; CHECK-NEXT: [[COPY22:%[0-9]+]].sub10_sub11:vreg_512 = COPY [[COPY19]]
52+
; CHECK-NEXT: [[COPY22:%[0-9]+]].sub12_sub13:vreg_512 = COPY [[COPY20]]
53+
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 1, [[COPY21]], implicit $exec
54+
; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY22]].sub0, 0, [[COPY22]].sub2, [[V_CMP_EQ_U32_e64_]], implicit $exec
55+
; CHECK-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY22]].sub1, 0, [[COPY22]].sub3, [[V_CMP_EQ_U32_e64_]], implicit $exec
56+
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 2, [[COPY21]], implicit $exec
57+
; CHECK-NEXT: [[V_CNDMASK_B32_e64_2:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY22]].sub4, [[V_CMP_EQ_U32_e64_1]], implicit $exec
58+
; CHECK-NEXT: [[V_CNDMASK_B32_e64_3:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_1]], 0, [[COPY22]].sub5, [[V_CMP_EQ_U32_e64_1]], implicit $exec
59+
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 3, [[COPY21]], implicit $exec
60+
; CHECK-NEXT: [[V_CNDMASK_B32_e64_4:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_2]], 0, [[COPY22]].sub6, [[V_CMP_EQ_U32_e64_2]], implicit $exec
61+
; CHECK-NEXT: [[V_CNDMASK_B32_e64_5:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_3]], 0, [[COPY22]].sub7, [[V_CMP_EQ_U32_e64_2]], implicit $exec
62+
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 4, [[COPY21]], implicit $exec
63+
; CHECK-NEXT: [[V_CNDMASK_B32_e64_6:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_4]], 0, [[COPY22]].sub8, [[V_CMP_EQ_U32_e64_3]], implicit $exec
64+
; CHECK-NEXT: [[V_CNDMASK_B32_e64_7:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_5]], 0, [[COPY22]].sub9, [[V_CMP_EQ_U32_e64_3]], implicit $exec
65+
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_4:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 5, [[COPY21]], implicit $exec
66+
; CHECK-NEXT: [[V_CNDMASK_B32_e64_8:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_6]], 0, [[COPY22]].sub10, [[V_CMP_EQ_U32_e64_4]], implicit $exec
67+
; CHECK-NEXT: [[V_CNDMASK_B32_e64_9:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_7]], 0, [[COPY22]].sub11, [[V_CMP_EQ_U32_e64_4]], implicit $exec
68+
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 6, [[COPY21]], implicit $exec
69+
; CHECK-NEXT: [[V_CNDMASK_B32_e64_10:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_8]], 0, [[COPY22]].sub12, [[V_CMP_EQ_U32_e64_5]], implicit $exec
70+
; CHECK-NEXT: [[V_CNDMASK_B32_e64_11:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_9]], 0, [[COPY22]].sub13, [[V_CMP_EQ_U32_e64_5]], implicit $exec
71+
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_6:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 7, [[COPY21]], implicit $exec
72+
; CHECK-NEXT: [[V_CNDMASK_B32_e64_12:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_10]], 0, undef [[COPY22]].sub14, [[V_CMP_EQ_U32_e64_6]], implicit $exec
73+
; CHECK-NEXT: [[V_CNDMASK_B32_e64_13:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_11]], 0, undef [[COPY22]].sub15, [[V_CMP_EQ_U32_e64_6]], implicit $exec
74+
; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_12]]
75+
; CHECK-NEXT: $vgpr1 = COPY [[V_CNDMASK_B32_e64_13]]
76+
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
77+
%2:vgpr_32 = COPY $vgpr0
78+
%3:vgpr_32 = COPY $vgpr1
79+
%4:vgpr_32 = COPY $vgpr2
80+
%5:vgpr_32 = COPY $vgpr3
81+
%6:vgpr_32 = COPY $vgpr4
82+
%7:vgpr_32 = COPY $vgpr5
83+
%8:vgpr_32 = COPY $vgpr6
84+
%9:vgpr_32 = COPY $vgpr7
85+
%10:vgpr_32 = COPY $vgpr8
86+
%11:vgpr_32 = COPY $vgpr9
87+
%12:vgpr_32 = COPY $vgpr10
88+
%13:vgpr_32 = COPY $vgpr11
89+
%14:vgpr_32 = COPY $vgpr12
90+
%15:vgpr_32 = COPY $vgpr13
91+
%16:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
92+
%17:vreg_64 = REG_SEQUENCE %4, %subreg.sub0, %5, %subreg.sub1
93+
%18:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %7, %subreg.sub1
94+
%19:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %9, %subreg.sub1
95+
%20:vreg_64 = REG_SEQUENCE %10, %subreg.sub0, %11, %subreg.sub1
96+
%21:vreg_64 = REG_SEQUENCE %12, %subreg.sub0, %13, %subreg.sub1
97+
%22:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %15, %subreg.sub1
98+
%1:vgpr_32 = COPY $vgpr14
99+
%34:vreg_512 = REG_SEQUENCE %16, %subreg.sub0_sub1, %17, %subreg.sub2_sub3, %18, %subreg.sub4_sub5, %19, %subreg.sub6_sub7, %20, %subreg.sub8_sub9, %21, %subreg.sub10_sub11, %22, %subreg.sub12_sub13, undef %35:vreg_64, %subreg.sub14_sub15
100+
%55:sreg_64_xexec = V_CMP_EQ_U32_e64 1, %1, implicit $exec
101+
%56:vgpr_32 = V_CNDMASK_B32_e64 0, %34.sub0, 0, %34.sub2, %55, implicit $exec
102+
%57:vgpr_32 = V_CNDMASK_B32_e64 0, %34.sub1, 0, %34.sub3, %55, implicit $exec
103+
%59:sreg_64_xexec = V_CMP_EQ_U32_e64 2, %1, implicit $exec
104+
%60:vgpr_32 = V_CNDMASK_B32_e64 0, %56, 0, %34.sub4, %59, implicit $exec
105+
%61:vgpr_32 = V_CNDMASK_B32_e64 0, %57, 0, %34.sub5, %59, implicit $exec
106+
%63:sreg_64_xexec = V_CMP_EQ_U32_e64 3, %1, implicit $exec
107+
%64:vgpr_32 = V_CNDMASK_B32_e64 0, %60, 0, %34.sub6, %63, implicit $exec
108+
%65:vgpr_32 = V_CNDMASK_B32_e64 0, %61, 0, %34.sub7, %63, implicit $exec
109+
%67:sreg_64_xexec = V_CMP_EQ_U32_e64 4, %1, implicit $exec
110+
%68:vgpr_32 = V_CNDMASK_B32_e64 0, %64, 0, %34.sub8, %67, implicit $exec
111+
%69:vgpr_32 = V_CNDMASK_B32_e64 0, %65, 0, %34.sub9, %67, implicit $exec
112+
%71:sreg_64_xexec = V_CMP_EQ_U32_e64 5, %1, implicit $exec
113+
%72:vgpr_32 = V_CNDMASK_B32_e64 0, %68, 0, %34.sub10, %71, implicit $exec
114+
%73:vgpr_32 = V_CNDMASK_B32_e64 0, %69, 0, %34.sub11, %71, implicit $exec
115+
%75:sreg_64_xexec = V_CMP_EQ_U32_e64 6, %1, implicit $exec
116+
%76:vgpr_32 = V_CNDMASK_B32_e64 0, %72, 0, %34.sub12, %75, implicit $exec
117+
%77:vgpr_32 = V_CNDMASK_B32_e64 0, %73, 0, %34.sub13, %75, implicit $exec
118+
%79:sreg_64_xexec = V_CMP_EQ_U32_e64 7, %1, implicit $exec
119+
%80:vgpr_32 = V_CNDMASK_B32_e64 0, %76, 0, %34.sub14, %79, implicit $exec
120+
%81:vgpr_32 = V_CNDMASK_B32_e64 0, %77, 0, %34.sub15, %79, implicit $exec
121+
$vgpr0 = COPY %80
122+
$vgpr1 = COPY %81
123+
SI_RETURN implicit $vgpr0, implicit $vgpr1
124+
125+
...

0 commit comments

Comments
 (0)