Skip to content

Commit f5266f5

Browse files
committed
[RegisterCoalescer] Further changes on top
Changes made compared to 0e46b49: * The code in `updateRegDefsUses` now updates subranges when subreg-liveness-tracking is enabled. * When adding an implicit-def operand for the super-register, the code in `reMaterializeTrivialDef` which tries to remove undefined subranges should now take into account that the lanes from the super-reg are no longer undefined. * Any operand that defines a register with a sub-reg, must set FullDef to false, because it only defines part of the register. * Ensures that live-ranges are computed for all regunits of a physical reg that's added as implicit-def, as needed for subsequent passes, see the conversation on: #76416
1 parent 19d65fb commit f5266f5

19 files changed

+506
-108
lines changed

llvm/lib/CodeGen/RegisterCoalescer.cpp

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1434,6 +1434,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
14341434

14351435
// CopyMI may have implicit operands, save them so that we can transfer them
14361436
// over to the newly materialized instruction after CopyMI is removed.
1437+
LaneBitmask NewMIImplicitOpsMask;
14371438
SmallVector<MachineOperand, 4> ImplicitOps;
14381439
ImplicitOps.reserve(CopyMI->getNumOperands() -
14391440
CopyMI->getDesc().getNumOperands());
@@ -1447,6 +1448,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
14471448
(MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) &&
14481449
"unexpected implicit virtual register def");
14491450
ImplicitOps.push_back(MO);
1451+
if (MO.isDef() && MO.getReg().isVirtual() &&
1452+
MRI->shouldTrackSubRegLiveness(DstReg))
1453+
NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
14501454
}
14511455
}
14521456

@@ -1489,14 +1493,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
14891493
} else {
14901494
assert(MO.getReg() == NewMI.getOperand(0).getReg());
14911495

1492-
// We're only expecting another def of the main output, so the range
1493-
// should get updated with the regular output range.
1494-
//
1495-
// FIXME: The range updating below probably needs updating to look at
1496-
// the super register if subranges are tracked.
1497-
assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
1498-
"subrange update for implicit-def of super register may not be "
1499-
"properly handled");
1496+
// If lanemasks need to be tracked, compile the lanemask of the NewMI
1497+
// implicit def operands to avoid subranges for the super-regs from
1498+
// being removed by code later on in this function.
1499+
if (MRI->shouldTrackSubRegLiveness(DstReg))
1500+
NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
15001501
}
15011502
}
15021503
}
@@ -1596,7 +1597,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
15961597
CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
15971598
VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
15981599
for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1599-
if ((SR.LaneMask & DstMask).none()) {
1600+
if ((SR.LaneMask & DstMask).none() &&
1601+
(SR.LaneMask & NewMIImplicitOpsMask).none()) {
16001602
LLVM_DEBUG(dbgs()
16011603
<< "Removing undefined SubRange "
16021604
<< PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
@@ -1881,6 +1883,14 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
18811883
}
18821884
}
18831885

1886+
// If DstInt already has a subrange for the unused lanes, then we shouldn't
1887+
// create duplicate subranges when we update the interval for unused lanes.
1888+
LaneBitmask DefinedLanes;
1889+
if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
1890+
for (LiveInterval::SubRange &SR : DstInt->subranges())
1891+
DefinedLanes |= SR.LaneMask;
1892+
}
1893+
18841894
SmallPtrSet<MachineInstr*, 8> Visited;
18851895
for (MachineRegisterInfo::reg_instr_iterator
18861896
I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end();
@@ -1915,9 +1925,7 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
19151925
// versa.
19161926
if (SubIdx && MO.isDef()) {
19171927
MO.setIsUndef(!Reads);
1918-
1919-
if (!Reads)
1920-
FullDef = false;
1928+
FullDef = false;
19211929
}
19221930

19231931
// A subreg use of a partially undef (super) register may be a complete
@@ -1960,12 +1968,25 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
19601968
// have been relying on those bits, so we need to ensure their liveness is
19611969
// captured with a def of other lanes.
19621970

1963-
// FIXME: Need to add new subrange if tracking subranges. We could also
1964-
// skip adding this if we knew the other lanes are dead, and only for
1965-
// other lanes.
1971+
if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
1972+
assert(DstInt->hasSubRanges() &&
1973+
"SUBREG_TO_REG should have resulted in subrange");
1974+
LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
1975+
LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
1976+
LaneBitmask UnusedLanes = DstMask & ~UsedLanes & ~DefinedLanes;
1977+
if ((UnusedLanes).any()) {
1978+
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
1979+
DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt);
1980+
DefinedLanes |= UnusedLanes;
1981+
}
1982+
} else if (DstIsPhys) {
1983+
// Ensure we have a computed liverange for all regunits,
1984+
// as this is required by the scheduler/regpressure tracker,
1985+
// see: https://github.com/llvm/llvm-project/issues/76416
1986+
for (MCRegUnit Unit : TRI->regunits(DstReg))
1987+
LIS->getRegUnit(Unit);
1988+
}
19661989

1967-
assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
1968-
"this should update subranges");
19691990
MachineInstrBuilder MIB(*MF, UseMI);
19701991
MIB.addReg(DstReg, RegState::ImplicitDefine);
19711992
}

llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2-
; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios < %s | FileCheck %s
2+
; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios -enable-subreg-liveness=false < %s | sed -e "/; kill: /d" | FileCheck %s
3+
; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios -enable-subreg-liveness=true < %s | FileCheck %s
34

45
; Check there's no assert in spilling from implicit-def operands on an
56
; IMPLICIT_DEF.
@@ -92,7 +93,6 @@ define void @widget(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %a
9293
; CHECK-NEXT: ldr x8, [sp, #40] ; 8-byte Folded Reload
9394
; CHECK-NEXT: mov x0, xzr
9495
; CHECK-NEXT: mov x1, xzr
95-
; CHECK-NEXT: ; kill: def $w8 killed $w8 killed $x8 def $x8
9696
; CHECK-NEXT: str x8, [sp]
9797
; CHECK-NEXT: bl _fprintf
9898
; CHECK-NEXT: brk #0x1

llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,12 @@ define i32 @caller() nounwind ssp {
2727
; CHECK-NEXT: sub sp, sp, #208
2828
; CHECK-NEXT: mov w8, #10 ; =0xa
2929
; CHECK-NEXT: mov w9, #9 ; =0x9
30-
; CHECK-NEXT: mov w10, #8 ; =0x8
30+
; CHECK-NEXT: mov w0, #1 ; =0x1
3131
; CHECK-NEXT: stp x9, x8, [sp, #24]
32-
; CHECK-NEXT: mov w8, #7 ; =0x7
32+
; CHECK-NEXT: mov w8, #8 ; =0x8
3333
; CHECK-NEXT: mov w9, #6 ; =0x6
34-
; CHECK-NEXT: mov w0, #1 ; =0x1
34+
; CHECK-NEXT: str x8, [sp, #16]
35+
; CHECK-NEXT: mov w8, #7 ; =0x7
3536
; CHECK-NEXT: mov w1, #2 ; =0x2
3637
; CHECK-NEXT: mov w2, #3 ; =0x3
3738
; CHECK-NEXT: mov w3, #4 ; =0x4
@@ -46,8 +47,7 @@ define i32 @caller() nounwind ssp {
4647
; CHECK-NEXT: stp x22, x21, [sp, #160] ; 16-byte Folded Spill
4748
; CHECK-NEXT: stp x20, x19, [sp, #176] ; 16-byte Folded Spill
4849
; CHECK-NEXT: stp x29, x30, [sp, #192] ; 16-byte Folded Spill
49-
; CHECK-NEXT: stp x8, x10, [sp, #8]
50-
; CHECK-NEXT: str x9, [sp]
50+
; CHECK-NEXT: stp x9, x8, [sp]
5151
; CHECK-NEXT: bl _callee
5252
; CHECK-NEXT: ldp x29, x30, [sp, #192] ; 16-byte Folded Reload
5353
; CHECK-NEXT: ldp x20, x19, [sp, #176] ; 16-byte Folded Reload
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -enable-subreg-liveness=false < %s | FileCheck %s
3+
; RUN: llc -enable-subreg-liveness=true < %s | FileCheck %s
4+
5+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
6+
target triple = "aarch64-unknown-linux-gnu"
7+
8+
define void @_ZN4llvm5APInt6divideEPKmjS2_jPmS3_(i32 %lhsWords, i32 %rhsWords) {
9+
; CHECK-LABEL: _ZN4llvm5APInt6divideEPKmjS2_jPmS3_:
10+
; CHECK: // %bb.0:
11+
; CHECK-NEXT: lsl w9, w0, #1
12+
; CHECK-NEXT: mov w10, #1 // =0x1
13+
; CHECK-NEXT: mov w8, w0
14+
; CHECK-NEXT: mov w0, #1 // =0x1
15+
; CHECK-NEXT: sub w9, w9, w1, lsl #1
16+
; CHECK-NEXT: bfi w0, w8, #1, #31
17+
; CHECK-NEXT: lsr w9, w9, #1
18+
; CHECK-NEXT: bfi w10, w9, #2, #30
19+
; CHECK-NEXT: cmp w10, #0
20+
; CHECK-NEXT: b.hs .LBB0_2
21+
; CHECK-NEXT: // %bb.1: // %if.then15
22+
; CHECK-NEXT: lsl x8, x0, #2
23+
; CHECK-NEXT: ldr xzr, [x8]
24+
; CHECK-NEXT: ret
25+
; CHECK-NEXT: .LBB0_2:
26+
; CHECK-NEXT: b _Znam
27+
%mul = shl i32 %rhsWords, 1
28+
%mul1 = shl i32 %lhsWords, 1
29+
%sub = sub i32 %mul1, %mul
30+
%add7 = or i32 %mul1, 1
31+
%idxprom = zext i32 %add7 to i64
32+
%mul3 = shl i32 %sub, 1
33+
%add4 = or i32 %mul3, 1
34+
%1 = icmp ult i32 %add4, 0
35+
br i1 %1, label %if.then15, label %3
36+
37+
common.ret: ; preds = %3, %if.then15
38+
ret void
39+
40+
if.then15: ; preds = %0
41+
%idxprom12 = zext i32 %add7 to i64
42+
%arrayidx13 = getelementptr [128 x i32], ptr null, i64 0, i64 %idxprom12
43+
%2 = load volatile ptr, ptr %arrayidx13, align 8
44+
br label %common.ret
45+
46+
3: ; preds = %0
47+
%call = tail call ptr @_Znam(i64 %idxprom)
48+
br label %common.ret
49+
}
50+
51+
declare ptr @_Znam(i64)
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=aarch64 -start-before=register-coalescer -stop-after=virtregrewriter -enable-subreg-liveness=false -o - %s | FileCheck %s
3+
# RUN: llc -mtriple=aarch64 -start-before=register-coalescer -stop-after=virtregrewriter -enable-subreg-liveness=true -o - %s | FileCheck %s
4+
---
5+
name: test
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0:
9+
liveins: $x1
10+
; CHECK-LABEL: name: test
11+
; CHECK: liveins: $x1
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: renamable $x0 = COPY $x1
14+
; CHECK-NEXT: renamable $w1 = ORRWrr $wzr, renamable $w0, implicit-def renamable $x1
15+
; CHECK-NEXT: RET_ReallyLR implicit $x1, implicit $x0
16+
%190:gpr64 = COPY killed $x1
17+
%191:gpr32 = COPY %190.sub_32:gpr64
18+
%192:gpr32 = ORRWrr $wzr, killed %191:gpr32
19+
%193:gpr64all = SUBREG_TO_REG 0, killed %192:gpr32, %subreg.sub_32
20+
$x0 = COPY killed %190:gpr64
21+
$x1 = COPY killed %193:gpr64all
22+
RET_ReallyLR implicit $x1, implicit $x0
23+
...

llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
# CHECK-DBG: ********** JOINING INTERVALS ***********
88
# CHECK-DBG: ********** INTERVALS **********
99
# CHECK-DBG: %0 [16r,32r:0) 0@16r weight:0.000000e+00
10-
# CHECK-DBG: %3 [48r,112r:0) 0@48r L0000000000000040 [48r,112r:0) 0@48r weight:0.000000e+00
11-
# CHECK-DBG: %4 [80r,112e:1)[112e,112d:0) 0@112e 1@80r L0000000000000080 [112e,112d:0) 0@112e L0000000000000040 [80r,112e:1)[112e,112d:0) 0@112e 1@80r weight:0.000000e+00
10+
# CHECK-DBG: %3 [48r,112r:0) 0@48r L0000000000000080 [48r,112r:0) 0@48r L0000000000000040 [48r,112r:0) 0@48r weight:0.000000e+00
11+
# CHECK-DBG: %4 [80r,112e:1)[112e,112d:0) 0@112e 1@80r L0000000000000080 [80r,112e:1)[112e,112d:0) 0@112e 1@80r L0000000000000040 [80r,112e:1)[112e,112d:0) 0@112e 1@80r weight:0.000000e+00
1212
# CHECK-DBG: %5 [32r,112r:1)[112r,112d:0) 0@112r 1@32r weight:0.000000e+00
1313
---
1414
name: test
@@ -43,7 +43,7 @@ body: |
4343
# CHECK-DBG: %1 [32r,48B:2)[48B,320r:0)[320r,368B:1) 0@48B-phi 1@320r 2@32r
4444
# CHECK-DBG-SAME: weight:0.000000e+00
4545
# CHECK-DBG: %3 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0@288r 1@240r 2@80r 3@304B-phi
46-
# CHECK-DBG-SAME: L0000000000000080 [288r,304B:0)[304B,320r:3) 0@288r 1@x 2@x 3@304B-phi
46+
# CHECK-DBG-SAME: L0000000000000080 [240r,272B:1)[288r,304B:0)[304B,320r:3) 0@288r 1@240r 2@x 3@304B-phi
4747
# CHECK-DBG-SAME: L0000000000000040 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0@288r 1@240r 2@80r 3@304B-phi
4848
# CHECK-DBG-SAME: weight:0.000000e+00
4949
---
@@ -127,3 +127,55 @@ body: |
127127
B %bb.1
128128
129129
...
130+
# Test that the interval `L0000000000000080 [112r,112d:1)` is not removed,
131+
# when removing undefined subranges.
132+
#
133+
# CHECK-DBG: ********** REGISTER COALESCER **********
134+
# CHECK-DBG: ********** Function: reproducer3
135+
# CHECK-DBG: ********** JOINING INTERVALS ***********
136+
# CHECK-DBG: ********** INTERVALS **********
137+
# CHECK-DBG: W0 [0B,32r:0)[320r,336r:1) 0@0B-phi 1@320r
138+
# CHECK-DBG: W1 [0B,16r:0) 0@0B-phi
139+
# CHECK-DBG: %0 [16r,64r:0) 0@16r weight:0.000000e+00
140+
# CHECK-DBG: %1 [32r,128r:0) 0@32r weight:0.000000e+00
141+
# CHECK-DBG: %2 [48r,64r:0) 0@48r weight:0.000000e+00
142+
# CHECK-DBG: %3 [64r,80r:0) 0@64r weight:0.000000e+00
143+
# CHECK-DBG: %4 [80r,176r:0) 0@80r weight:0.000000e+00
144+
# CHECK-DBG: %7 [112r,128r:1)[128r,256r:0)[304B,320r:0) 0@128r 1@112r
145+
# CHECK-DBG-SAME: L0000000000000080 [112r,112d:1)[128r,256r:0)[304B,320r:0) 0@128r 1@112r
146+
# CHECK-DBG-SAME: L0000000000000040 [112r,128r:1)[128r,256r:0)[304B,320r:0) 0@128r 1@112r
147+
# CHECK-DBG-SAME: weight:0.000000e+00
148+
# CHECK-DBG: %8 [96r,176r:1)[176r,192r:0) 0@176r 1@96r weight:0.000000e+00
149+
# CHECK-DBG: %9 [256r,272r:0) 0@256r weight:0.000000e+00
150+
---
151+
name: reproducer3
152+
tracksRegLiveness: true
153+
body: |
154+
bb.0:
155+
liveins: $w0, $w1
156+
157+
%0:gpr32 = COPY killed $w1
158+
%1:gpr32 = COPY killed $w0
159+
%3:gpr32 = UBFMWri %1, 31, 30
160+
%4:gpr32 = SUBWrs killed %3, killed %0, 1
161+
%5:gpr32 = UBFMWri killed %4, 1, 31
162+
%6:gpr32 = MOVi32imm 1
163+
%7:gpr32 = COPY %6
164+
%7:gpr32 = BFMWri %7, killed %1, 31, 30
165+
%8:gpr64 = SUBREG_TO_REG 0, killed %7, %subreg.sub_32
166+
%9:gpr32common = COPY killed %6
167+
%9:gpr32common = BFMWri %9, killed %5, 30, 29
168+
dead $wzr = SUBSWri killed %9, 0, 0, implicit-def $nzcv
169+
Bcc 2, %bb.2, implicit killed $nzcv
170+
B %bb.1
171+
172+
bb.1:
173+
%10:gpr64common = UBFMXri killed %8, 62, 61
174+
dead $xzr = LDRXui killed %10, 0
175+
RET_ReallyLR
176+
177+
bb.2:
178+
$x0 = COPY killed %8
179+
RET_ReallyLR implicit killed $x0
180+
181+
...

llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -750,21 +750,25 @@ entry:
750750
define <2 x double> @testDoubleImm1(<2 x double> %a, double %b) {
751751
; CHECK-64-LABEL: testDoubleImm1:
752752
; CHECK-64: # %bb.0: # %entry
753+
; CHECK-64-NEXT: # kill: def $f1 killed $f1 def $vsl1
753754
; CHECK-64-NEXT: xxpermdi 34, 1, 34, 1
754755
; CHECK-64-NEXT: blr
755756
;
756757
; CHECK-32-LABEL: testDoubleImm1:
757758
; CHECK-32: # %bb.0: # %entry
759+
; CHECK-32-NEXT: # kill: def $f1 killed $f1 def $vsl1
758760
; CHECK-32-NEXT: xxpermdi 34, 1, 34, 1
759761
; CHECK-32-NEXT: blr
760762
;
761763
; CHECK-64-P10-LABEL: testDoubleImm1:
762764
; CHECK-64-P10: # %bb.0: # %entry
765+
; CHECK-64-P10-NEXT: # kill: def $f1 killed $f1 def $vsl1
763766
; CHECK-64-P10-NEXT: xxpermdi 34, 1, 34, 1
764767
; CHECK-64-P10-NEXT: blr
765768
;
766769
; CHECK-32-P10-LABEL: testDoubleImm1:
767770
; CHECK-32-P10: # %bb.0: # %entry
771+
; CHECK-32-P10-NEXT: # kill: def $f1 killed $f1 def $vsl1
768772
; CHECK-32-P10-NEXT: xxpermdi 34, 1, 34, 1
769773
; CHECK-32-P10-NEXT: blr
770774
entry:

0 commit comments

Comments
 (0)