Skip to content

Commit e2e3f06

Browse files
committed
Revert "[MachineScheduler] Track physical register dependencies per-regunit"
This reverts commit 1a54671. It was causing lit test failures in a LLVM_ENABLE_EXPENSIVE_CHECKS build.
1 parent eb1617a commit e2e3f06

File tree

12 files changed

+190
-190
lines changed

12 files changed

+190
-190
lines changed

llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,12 +77,11 @@ namespace llvm {
7777
struct PhysRegSUOper {
7878
SUnit *SU;
7979
int OpIdx;
80-
unsigned RegUnit;
80+
unsigned Reg;
8181

82-
PhysRegSUOper(SUnit *su, int op, unsigned R)
83-
: SU(su), OpIdx(op), RegUnit(R) {}
82+
PhysRegSUOper(SUnit *su, int op, unsigned R): SU(su), OpIdx(op), Reg(R) {}
8483

85-
unsigned getSparseSetIndex() const { return RegUnit; }
84+
unsigned getSparseSetIndex() const { return Reg; }
8685
};
8786

8887
/// Use a SparseMultiSet to track physical registers. Storage is only

llvm/lib/CodeGen/ScheduleDAGInstrs.cpp

Lines changed: 26 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -211,8 +211,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
211211
for (const MachineOperand &MO : ExitMI->all_uses()) {
212212
Register Reg = MO.getReg();
213213
if (Reg.isPhysical()) {
214-
for (MCRegUnit Unit : TRI->regunits(Reg))
215-
Uses.insert(PhysRegSUOper(&ExitSU, -1, Unit));
214+
Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
216215
} else if (Reg.isVirtual() && MO.readsReg()) {
217216
addVRegUseDeps(&ExitSU, MO.getOperandNo());
218217
}
@@ -223,11 +222,8 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
223222
// uses all the registers that are livein to the successor blocks.
224223
for (const MachineBasicBlock *Succ : BB->successors()) {
225224
for (const auto &LI : Succ->liveins()) {
226-
// TODO: Use LI.LaneMask to refine this.
227-
for (MCRegUnit Unit : TRI->regunits(LI.PhysReg)) {
228-
if (!Uses.contains(Unit))
229-
Uses.insert(PhysRegSUOper(&ExitSU, -1, Unit));
230-
}
225+
if (!Uses.contains(LI.PhysReg))
226+
Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg));
231227
}
232228
}
233229
}
@@ -248,8 +244,8 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
248244
const MCInstrDesc &DefMIDesc = SU->getInstr()->getDesc();
249245
bool ImplicitPseudoDef = (OperIdx >= DefMIDesc.getNumOperands() &&
250246
!DefMIDesc.hasImplicitDefOfPhysReg(Reg));
251-
for (MCRegUnit Unit : TRI->regunits(Reg)) {
252-
for (Reg2SUnitsMap::iterator I = Uses.find(Unit); I != Uses.end(); ++I) {
247+
for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) {
248+
for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
253249
SUnit *UseSU = I->SU;
254250
if (UseSU == SU)
255251
continue;
@@ -266,14 +262,11 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
266262
// Set the hasPhysRegDefs only for physreg defs that have a use within
267263
// the scheduling region.
268264
SU->hasPhysRegDefs = true;
269-
270265
UseInstr = UseSU->getInstr();
271-
Register UseReg = UseInstr->getOperand(UseOpIdx).getReg();
272266
const MCInstrDesc &UseMIDesc = UseInstr->getDesc();
273-
ImplicitPseudoUse = UseOpIdx >= ((int)UseMIDesc.getNumOperands()) &&
274-
!UseMIDesc.hasImplicitUseOfPhysReg(UseReg);
275-
276-
Dep = SDep(SU, SDep::Data, UseReg);
267+
ImplicitPseudoUse = (UseOpIdx >= ((int)UseMIDesc.getNumOperands()) &&
268+
!UseMIDesc.hasImplicitUseOfPhysReg(*Alias));
269+
Dep = SDep(SU, SDep::Data, *Alias);
277270
}
278271
if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
279272
Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
@@ -307,16 +300,15 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
307300
// TODO: Using a latency of 1 here for output dependencies assumes
308301
// there's no cost for reusing registers.
309302
SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
310-
for (MCRegUnit Unit : TRI->regunits(Reg)) {
311-
for (Reg2SUnitsMap::iterator I = Defs.find(Unit); I != Defs.end(); ++I) {
303+
for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) {
304+
for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
312305
SUnit *DefSU = I->SU;
313306
if (DefSU == &ExitSU)
314307
continue;
315308
MachineInstr *DefInstr = DefSU->getInstr();
316-
MachineOperand &DefMO = DefInstr->getOperand(I->OpIdx);
317-
if (DefSU != SU &&
318-
(Kind != SDep::Output || !MO.isDead() || !DefMO.isDead())) {
319-
SDep Dep(SU, Kind, DefMO.getReg());
309+
if (DefSU != SU && (Kind != SDep::Output || !MO.isDead() ||
310+
!DefInstr->registerDefIsDead(*Alias))) {
311+
SDep Dep(SU, Kind, /*Reg=*/*Alias);
320312
if (Kind != SDep::Anti) {
321313
Dep.setLatency(
322314
SchedModel.computeOutputLatency(MI, OperIdx, DefInstr));
@@ -332,42 +324,37 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
332324
// Either insert a new Reg2SUnits entry with an empty SUnits list, or
333325
// retrieve the existing SUnits list for this register's uses.
334326
// Push this SUnit on the use list.
335-
for (MCRegUnit Unit : TRI->regunits(Reg))
336-
Uses.insert(PhysRegSUOper(SU, OperIdx, Unit));
327+
Uses.insert(PhysRegSUOper(SU, OperIdx, Reg));
337328
if (RemoveKillFlags)
338329
MO.setIsKill(false);
339330
} else {
340331
addPhysRegDataDeps(SU, OperIdx);
341332

342333
// Clear previous uses and defs of this register and its subregisters.
343-
for (MCRegUnit Unit : TRI->regunits(Reg)) {
344-
Uses.eraseAll(Unit);
334+
for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg)) {
335+
Uses.eraseAll(SubReg);
345336
if (!MO.isDead())
346-
Defs.eraseAll(Unit);
337+
Defs.eraseAll(SubReg);
347338
}
348-
349339
if (MO.isDead() && SU->isCall) {
350340
// Calls will not be reordered because of chain dependencies (see
351341
// below). Since call operands are dead, calls may continue to be added
352342
// to the DefList making dependence checking quadratic in the size of
353343
// the block. Instead, we leave only one call at the back of the
354344
// DefList.
355-
for (MCRegUnit Unit : TRI->regunits(Reg)) {
356-
Reg2SUnitsMap::RangePair P = Defs.equal_range(Unit);
357-
Reg2SUnitsMap::iterator B = P.first;
358-
Reg2SUnitsMap::iterator I = P.second;
359-
for (bool isBegin = I == B; !isBegin; /* empty */) {
360-
isBegin = (--I) == B;
361-
if (!I->SU->isCall)
362-
break;
363-
I = Defs.erase(I);
364-
}
345+
Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg);
346+
Reg2SUnitsMap::iterator B = P.first;
347+
Reg2SUnitsMap::iterator I = P.second;
348+
for (bool isBegin = I == B; !isBegin; /* empty */) {
349+
isBegin = (--I) == B;
350+
if (!I->SU->isCall)
351+
break;
352+
I = Defs.erase(I);
365353
}
366354
}
367355

368356
// Defs are pushed in the order they are visited and never reordered.
369-
for (MCRegUnit Unit : TRI->regunits(Reg))
370-
Defs.insert(PhysRegSUOper(SU, OperIdx, Unit));
357+
Defs.insert(PhysRegSUOper(SU, OperIdx, Reg));
371358
}
372359
}
373360

llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1363,11 +1363,11 @@ define amdgpu_kernel void @v_copysign_out_f16_mag_f16_sign_f64(ptr addrspace(1)
13631363
; VI-NEXT: flat_load_dwordx2 v[1:2], v[1:2]
13641364
; VI-NEXT: s_waitcnt vmcnt(0)
13651365
; VI-NEXT: v_mov_b32_e32 v1, s7
1366-
; VI-NEXT: flat_load_ushort v3, v[0:1]
13671366
; VI-NEXT: s_movk_i32 s0, 0x7fff
1367+
; VI-NEXT: flat_load_ushort v3, v[0:1]
1368+
; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
13681369
; VI-NEXT: v_mov_b32_e32 v0, s4
13691370
; VI-NEXT: v_mov_b32_e32 v1, s5
1370-
; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
13711371
; VI-NEXT: s_waitcnt vmcnt(0)
13721372
; VI-NEXT: v_bfi_b32 v2, s0, v3, v2
13731373
; VI-NEXT: flat_store_short v[0:1], v2

llvm/test/CodeGen/AMDGPU/load-global-i16.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3788,13 +3788,13 @@ define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(ptr addrspace(1) %ou
37883788
; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v33, 16, v2
37893789
; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v34, 0xffff, v3
37903790
; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v32, 0xffff, v2
3791-
; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v39, 16, v1
3792-
; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v37, 16, v0
37933791
; GCN-NOHSA-VI-NEXT: buffer_store_dword v32, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill
37943792
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
37953793
; GCN-NOHSA-VI-NEXT: buffer_store_dword v33, off, s[88:91], 0 offset:8 ; 4-byte Folded Spill
37963794
; GCN-NOHSA-VI-NEXT: buffer_store_dword v34, off, s[88:91], 0 offset:12 ; 4-byte Folded Spill
37973795
; GCN-NOHSA-VI-NEXT: buffer_store_dword v35, off, s[88:91], 0 offset:16 ; 4-byte Folded Spill
3796+
; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v39, 16, v1
3797+
; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v37, 16, v0
37983798
; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v38, 0xffff, v1
37993799
; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v36, 0xffff, v0
38003800
; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v29

llvm/test/CodeGen/AMDGPU/schedule-physregdeps.mir

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,15 @@
44
# CHECK: SU(0): $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec
55
# CHECK: Successors:
66
# CHECK-NEXT: SU(2): Out Latency=1
7+
# CHECK-NEXT: SU(4): Out Latency=1
78
# CHECK-NEXT: SU(2): Data Latency=1 Reg=$vgpr0
9+
# CHECK-NEXT: SU(4): Data Latency=1 Reg=$vgpr0_vgpr1
810
# CHECK: SU(1): $vgpr1 = V_MOV_B32_e32 $sgpr0, implicit $exec
911
# CHECK: Successors:
1012
# CHECK-NEXT: SU(3): Out Latency=1
13+
# CHECK-NEXT: SU(4): Out Latency=1
1114
# CHECK-NEXT: SU(3): Data Latency=1 Reg=$vgpr1
15+
# CHECK-NEXT: SU(4): Data Latency=1 Reg=$vgpr0_vgpr1
1216
# CHECK: SU(2): $vgpr0 = V_ADD_CO_U32_e32 $sgpr2, $vgpr0, implicit-def $vcc, implicit $exec
1317
# CHECK: Predecessors:
1418
# CHECK-NEXT: SU(0): Out Latency=1
@@ -18,6 +22,7 @@
1822
# CHECK-NEXT: SU(4): Data Latency=1 Reg=$vgpr0_vgpr1
1923
# CHECK-NEXT: SU(3): Out Latency=1
2024
# CHECK-NEXT: SU(3): Data Latency=1 Reg=$vcc
25+
# CHECK-NEXT: SU(4): Anti Latency=0
2126
# CHECK: SU(3): $vgpr1 = V_ADDC_U32_e32 0, $vgpr1, implicit-def dead $vcc, implicit $vcc, implicit $exec
2227
# CHECK: Predecessors:
2328
# CHECK-NEXT: SU(2): Out Latency=1
@@ -27,12 +32,19 @@
2732
# CHECK: Successors:
2833
# CHECK-NEXT: SU(4): Out Latency=1
2934
# CHECK-NEXT: SU(4): Data Latency=1 Reg=$vgpr0_vgpr1
35+
# CHECK-NEXT: SU(4): Anti Latency=0
3036
# CHECK: SU(4): $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
3137
# CHECK: Predecessors:
3238
# CHECK-NEXT: SU(3): Out Latency=1
3339
# CHECK-NEXT: SU(3): Data Latency=1 Reg=$vgpr0_vgpr1
40+
# CHECK-NEXT: SU(3): Anti Latency=0
3441
# CHECK-NEXT: SU(2): Out Latency=1
3542
# CHECK-NEXT: SU(2): Data Latency=1 Reg=$vgpr0_vgpr1
43+
# CHECK-NEXT: SU(2): Anti Latency=0
44+
# CHECK-NEXT: SU(1): Out Latency=1
45+
# CHECK-NEXT: SU(1): Data Latency=1 Reg=$vgpr0_vgpr1
46+
# CHECK-NEXT: SU(0): Out Latency=1
47+
# CHECK-NEXT: SU(0): Data Latency=1 Reg=$vgpr0_vgpr1
3648
# CHECK: Successors:
3749
# CHECK-NEXT: ExitSU: Ord Latency=3 Artificial
3850

0 commit comments

Comments
 (0)