Skip to content

Commit 9e436c2

Browse files
authored
[MachineCP] Correctly handle register masks and sub-registers (#122734)
When passing an instruction with a register mask, the machine copy propagation pass was dropping the information about some copy instructions which define a register which is preserved by the mask, because that register overlaps a register which is partially clobbered by it. This resulted in a miscompilation for AArch64, because this caused a live copy to be considered dead. The fix is to clobber register masks by finding the set of reg units which is preserved by the mask, and clobbering all units not in that set. This is based on #122472, and fixes the compile time performance regressions which were caused by that.
1 parent 3a9380f commit 9e436c2

File tree

2 files changed

+121
-55
lines changed

2 files changed

+121
-55
lines changed

llvm/lib/CodeGen/MachineCopyPropagation.cpp

Lines changed: 91 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,32 @@ class CopyTracker {
117117

118118
DenseMap<MCRegUnit, CopyInfo> Copies;
119119

120+
// Memoised sets of register units which are preserved by each register mask,
121+
// needed to efficiently remove copies which are invalidated by call
122+
// instructions.
123+
DenseMap<const uint32_t *, BitVector> RegMaskToPreservedRegUnits;
124+
120125
public:
126+
/// Get the set of register units which are preserved by RegMaskOp.
127+
BitVector &getPreservedRegUnits(const MachineOperand &RegMaskOp,
128+
const TargetRegisterInfo &TRI) {
129+
const uint32_t *RegMask = RegMaskOp.getRegMask();
130+
auto Existing = RegMaskToPreservedRegUnits.find(RegMask);
131+
if (Existing != RegMaskToPreservedRegUnits.end()) {
132+
return Existing->second;
133+
} else {
134+
BitVector &PreservedRegUnits = RegMaskToPreservedRegUnits[RegMask];
135+
136+
PreservedRegUnits.resize(TRI.getNumRegUnits());
137+
for (unsigned SafeReg = 0, E = TRI.getNumRegs(); SafeReg < E; ++SafeReg)
138+
if (!RegMaskOp.clobbersPhysReg(SafeReg))
139+
for (auto SafeUnit : TRI.regunits(SafeReg))
140+
PreservedRegUnits.set(SafeUnit);
141+
142+
return PreservedRegUnits;
143+
}
144+
}
145+
121146
/// Mark all of the given registers and their subregisters as unavailable for
122147
/// copying.
123148
void markRegsUnavailable(ArrayRef<MCRegister> Regs,
@@ -164,64 +189,70 @@ class CopyTracker {
164189
Copies.erase(Unit);
165190
}
166191

167-
/// Clobber a single register, removing it from the tracker's copy maps.
168-
void clobberRegister(MCRegister Reg, const TargetRegisterInfo &TRI,
169-
const TargetInstrInfo &TII, bool UseCopyInstr) {
170-
for (MCRegUnit Unit : TRI.regunits(Reg)) {
171-
auto I = Copies.find(Unit);
172-
if (I != Copies.end()) {
173-
// When we clobber the source of a copy, we need to clobber everything
174-
// it defined.
175-
markRegsUnavailable(I->second.DefRegs, TRI);
176-
// When we clobber the destination of a copy, we need to clobber the
177-
// whole register it defined.
178-
if (MachineInstr *MI = I->second.MI) {
179-
std::optional<DestSourcePair> CopyOperands =
180-
isCopyInstr(*MI, TII, UseCopyInstr);
181-
182-
MCRegister Def = CopyOperands->Destination->getReg().asMCReg();
183-
MCRegister Src = CopyOperands->Source->getReg().asMCReg();
184-
185-
markRegsUnavailable(Def, TRI);
186-
187-
// Since we clobber the destination of a copy, the semantic of Src's
188-
// "DefRegs" to contain Def is no longer effectual. We will also need
189-
// to remove the record from the copy maps that indicates Src defined
190-
// Def. Failing to do so might cause the target to miss some
191-
// opportunities to further eliminate redundant copy instructions.
192-
// Consider the following sequence during the
193-
// ForwardCopyPropagateBlock procedure:
194-
// L1: r0 = COPY r9 <- TrackMI
195-
// L2: r0 = COPY r8 <- TrackMI (Remove r9 defined r0 from tracker)
196-
// L3: use r0 <- Remove L2 from MaybeDeadCopies
197-
// L4: early-clobber r9 <- Clobber r9 (L2 is still valid in tracker)
198-
// L5: r0 = COPY r8 <- Remove NopCopy
199-
for (MCRegUnit SrcUnit : TRI.regunits(Src)) {
200-
auto SrcCopy = Copies.find(SrcUnit);
201-
if (SrcCopy != Copies.end() && SrcCopy->second.LastSeenUseInCopy) {
202-
// If SrcCopy defines multiple values, we only need
203-
// to erase the record for Def in DefRegs.
204-
for (auto itr = SrcCopy->second.DefRegs.begin();
205-
itr != SrcCopy->second.DefRegs.end(); itr++) {
206-
if (*itr == Def) {
207-
SrcCopy->second.DefRegs.erase(itr);
208-
// If DefReg becomes empty after removal, we can remove the
209-
// SrcCopy from the tracker's copy maps. We only remove those
210-
// entries solely record the Def is defined by Src. If an
211-
// entry also contains the definition record of other Def'
212-
// registers, it cannot be cleared.
213-
if (SrcCopy->second.DefRegs.empty() && !SrcCopy->second.MI) {
214-
Copies.erase(SrcCopy);
215-
}
216-
break;
192+
/// Clobber a single register unit, removing it from the tracker's copy maps.
193+
void clobberRegUnit(MCRegUnit Unit, const TargetRegisterInfo &TRI,
194+
const TargetInstrInfo &TII, bool UseCopyInstr) {
195+
auto I = Copies.find(Unit);
196+
if (I != Copies.end()) {
197+
// When we clobber the source of a copy, we need to clobber everything
198+
// it defined.
199+
markRegsUnavailable(I->second.DefRegs, TRI);
200+
// When we clobber the destination of a copy, we need to clobber the
201+
// whole register it defined.
202+
if (MachineInstr *MI = I->second.MI) {
203+
std::optional<DestSourcePair> CopyOperands =
204+
isCopyInstr(*MI, TII, UseCopyInstr);
205+
206+
MCRegister Def = CopyOperands->Destination->getReg().asMCReg();
207+
MCRegister Src = CopyOperands->Source->getReg().asMCReg();
208+
209+
markRegsUnavailable(Def, TRI);
210+
211+
// Since we clobber the destination of a copy, the semantic of Src's
212+
// "DefRegs" to contain Def is no longer effectual. We will also need
213+
// to remove the record from the copy maps that indicates Src defined
214+
// Def. Failing to do so might cause the target to miss some
215+
// opportunities to further eliminate redundant copy instructions.
216+
// Consider the following sequence during the
217+
// ForwardCopyPropagateBlock procedure:
218+
// L1: r0 = COPY r9 <- TrackMI
219+
// L2: r0 = COPY r8 <- TrackMI (Remove r9 defined r0 from tracker)
220+
// L3: use r0 <- Remove L2 from MaybeDeadCopies
221+
// L4: early-clobber r9 <- Clobber r9 (L2 is still valid in tracker)
222+
// L5: r0 = COPY r8 <- Remove NopCopy
223+
for (MCRegUnit SrcUnit : TRI.regunits(Src)) {
224+
auto SrcCopy = Copies.find(SrcUnit);
225+
if (SrcCopy != Copies.end() && SrcCopy->second.LastSeenUseInCopy) {
226+
// If SrcCopy defines multiple values, we only need
227+
// to erase the record for Def in DefRegs.
228+
for (auto itr = SrcCopy->second.DefRegs.begin();
229+
itr != SrcCopy->second.DefRegs.end(); itr++) {
230+
if (*itr == Def) {
231+
SrcCopy->second.DefRegs.erase(itr);
232+
// If DefReg becomes empty after removal, we can remove the
233+
// SrcCopy from the tracker's copy maps. We only remove those
234+
// entries solely record the Def is defined by Src. If an
235+
// entry also contains the definition record of other Def'
236+
// registers, it cannot be cleared.
237+
if (SrcCopy->second.DefRegs.empty() && !SrcCopy->second.MI) {
238+
Copies.erase(SrcCopy);
217239
}
240+
break;
218241
}
219242
}
220243
}
221244
}
222-
// Now we can erase the copy.
223-
Copies.erase(I);
224245
}
246+
// Now we can erase the copy.
247+
Copies.erase(I);
248+
}
249+
}
250+
251+
/// Clobber a single register, removing it from the tracker's copy maps.
252+
void clobberRegister(MCRegister Reg, const TargetRegisterInfo &TRI,
253+
const TargetInstrInfo &TII, bool UseCopyInstr) {
254+
for (MCRegUnit Unit : TRI.regunits(Reg)) {
255+
clobberRegUnit(Unit, TRI, TII, UseCopyInstr);
225256
}
226257
}
227258

@@ -960,6 +991,9 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
960991
// a large set of registers. Treat clobbered registers the same way as
961992
// defined registers.
962993
if (RegMask) {
994+
BitVector &PreservedRegUnits =
995+
Tracker.getPreservedRegUnits(*RegMask, *TRI);
996+
963997
// Erase any MaybeDeadCopies whose destination register is clobbered.
964998
for (SmallSetVector<MachineInstr *, 8>::iterator DI =
965999
MaybeDeadCopies.begin();
@@ -978,9 +1012,11 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
9781012
LLVM_DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: ";
9791013
MaybeDead->dump());
9801014

981-
// Make sure we invalidate any entries in the copy maps before erasing
982-
// the instruction.
983-
Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr);
1015+
// Invalidate all entries in the copy map which are not preserved by
1016+
// this register mask.
1017+
for (unsigned RegUnit : TRI->regunits(Reg))
1018+
if (!PreservedRegUnits.test(RegUnit))
1019+
Tracker.clobberRegUnit(RegUnit, *TRI, *TII, UseCopyInstr);
9841020

9851021
// erase() will return the next valid iterator pointing to the next
9861022
// element after the erased one.

llvm/test/CodeGen/AArch64/machine-cp-sub-reg.mir

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,17 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
22
# RUN: llc -o - %s --run-pass=machine-cp -mcp-use-is-copy-instr -mtriple=arm64-apple-macos --verify-machineinstrs | FileCheck %s
33

4+
--- |
5+
declare void @foo()
6+
7+
define void @test() {
8+
unreachable
9+
}
10+
define void @test2() {
11+
unreachable
12+
}
13+
...
14+
415
---
516
name: test
617
tracksRegLiveness: true
@@ -30,3 +41,22 @@ body: |
3041
3142
RET undef $lr, implicit $x0
3243
...
44+
---
45+
name: test2
46+
tracksRegLiveness: true
47+
body: |
48+
bb.0:
49+
liveins: $q14, $d29, $x0, $x1
50+
; CHECK-LABEL: name: test2
51+
; CHECK: liveins: $q14, $d29, $x0, $x1
52+
; CHECK-NEXT: {{ $}}
53+
; CHECK-NEXT: renamable $d8 = COPY killed renamable $d29
54+
; CHECK-NEXT: BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
55+
; CHECK-NEXT: renamable $b0 = SMAXVv8i8v killed renamable $d8, implicit-def $q0
56+
; CHECK-NEXT: RET_ReallyLR implicit $b0
57+
renamable $q8 = COPY renamable $q14
58+
renamable $d8 = COPY killed renamable $d29
59+
BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
60+
renamable $b0 = SMAXVv8i8v killed renamable $d8, implicit-def $q0
61+
RET_ReallyLR implicit $b0
62+
...

0 commit comments

Comments
 (0)