Skip to content

Commit a361de6

Browse files
authored
[RDF] Create phi nodes for clobbering defs (llvm#123694)
When a def in a block A reaches another block B that is in A's iterated dominance frontier, a phi node is added to B for the def register. A clobbering def can be created at a call instruction, for a register clobbered by a call. However, phi nodes are not created for a register, when one of the reaching defs of the register is a clobbering def. This patch adds phi nodes for registers that have a clobbering reaching def. These additional phis help in checking reaching defs for an instruction in RDF based copy propagation and addressing mode optimizations.
1 parent cfd8980 commit a361de6

File tree

4 files changed

+304
-10
lines changed

4 files changed

+304
-10
lines changed

llvm/include/llvm/CodeGen/RDFGraph.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -865,16 +865,17 @@ struct DataFlowGraph {
865865
using BlockRefsMap = RegisterAggrMap<NodeId>;
866866

867867
void buildStmt(Block BA, MachineInstr &In);
868-
void recordDefsForDF(BlockRefsMap &PhiM, Block BA);
869-
void buildPhis(BlockRefsMap &PhiM, Block BA);
868+
void recordDefsForDF(BlockRefsMap &PhiM, BlockRefsMap &PhiClobberM, Block BA);
869+
void buildPhis(BlockRefsMap &PhiM, Block BA,
870+
const DefStackMap &DefM = DefStackMap());
870871
void removeUnusedPhis();
871872

872873
void pushClobbers(Instr IA, DefStackMap &DM);
873874
void pushDefs(Instr IA, DefStackMap &DM);
874875
template <typename T> void linkRefUp(Instr IA, NodeAddr<T> TA, DefStack &DS);
875876
template <typename Predicate>
876877
void linkStmtRefs(DefStackMap &DefM, Stmt SA, Predicate P);
877-
void linkBlockRefs(DefStackMap &DefM, Block BA);
878+
void linkBlockRefs(DefStackMap &DefM, BlockRefsMap &PhiClobberM, Block BA);
878879

879880
void unlinkUseDF(Use UA);
880881
void unlinkDefDF(Def DA);

llvm/lib/CodeGen/RDFGraph.cpp

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -966,15 +966,18 @@ void DataFlowGraph::build(const Config &config) {
966966

967967
// Build a map "PhiM" which will contain, for each block, the set
968968
// of references that will require phi definitions in that block.
969+
// "PhiClobberM" map contains references that require phis for clobbering defs
969970
BlockRefsMap PhiM(getPRI());
971+
BlockRefsMap PhiClobberM(getPRI());
970972
for (Block BA : Blocks)
971-
recordDefsForDF(PhiM, BA);
973+
recordDefsForDF(PhiM, PhiClobberM, BA);
972974
for (Block BA : Blocks)
973975
buildPhis(PhiM, BA);
974976

975977
// Link all the refs. This will recursively traverse the dominator tree.
978+
// Phis for clobbering defs are added here.
976979
DefStackMap DM;
977-
linkBlockRefs(DM, EA);
980+
linkBlockRefs(DM, PhiClobberM, EA);
978981

979982
// Finally, remove all unused phi nodes.
980983
if (!(BuildCfg.Options & BuildOptions::KeepDeadPhis))
@@ -1378,7 +1381,9 @@ void DataFlowGraph::buildStmt(Block BA, MachineInstr &In) {
13781381

13791382
// Scan all defs in the block node BA and record in PhiM the locations of
13801383
// phi nodes corresponding to these defs.
1381-
void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, Block BA) {
1384+
// Clobbering defs in BA are recorded in PhiClobberM
1385+
void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM,
1386+
BlockRefsMap &PhiClobberM, Block BA) {
13821387
// Check all defs from block BA and record them in each block in BA's
13831388
// iterated dominance frontier. This information will later be used to
13841389
// create phi nodes.
@@ -1394,11 +1399,17 @@ void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, Block BA) {
13941399
// This is done to make sure that each defined reference gets only one
13951400
// phi node, even if it is defined multiple times.
13961401
RegisterAggr Defs(getPRI());
1402+
RegisterAggr ClobberDefs(getPRI());
13971403
for (Instr IA : BA.Addr->members(*this)) {
13981404
for (Ref RA : IA.Addr->members_if(IsDef, *this)) {
13991405
RegisterRef RR = RA.Addr->getRegRef(*this);
1400-
if (RR.isReg() && isTracked(RR))
1406+
if (!isTracked(RR))
1407+
continue;
1408+
if (RR.isReg())
14011409
Defs.insert(RR);
1410+
// Clobbering def
1411+
else if (RR.isMask())
1412+
ClobberDefs.insert(RR);
14021413
}
14031414
}
14041415

@@ -1416,12 +1427,14 @@ void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, Block BA) {
14161427
for (auto *DB : IDF) {
14171428
Block DBA = findBlock(DB);
14181429
PhiM[DBA.Id].insert(Defs);
1430+
PhiClobberM[DBA.Id].insert(ClobberDefs);
14191431
}
14201432
}
14211433

14221434
// Given the locations of phi nodes in the map PhiM, create the phi nodes
14231435
// that are located in the block node BA.
1424-
void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, Block BA) {
1436+
void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, Block BA,
1437+
const DefStackMap &DefM) {
14251438
// Check if this blocks has any DF defs, i.e. if there are any defs
14261439
// that this block is in the iterated dominance frontier of.
14271440
auto HasDF = PhiM.find(BA.Id);
@@ -1434,10 +1447,37 @@ void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, Block BA) {
14341447
for (MachineBasicBlock *PB : MBB->predecessors())
14351448
Preds.push_back(findBlock(PB));
14361449

1450+
RegisterAggr PhiDefs(getPRI());
1451+
// DefM will be non empty when we are building phis
1452+
// for clobbering defs
1453+
if (!DefM.empty()) {
1454+
for (Instr IA : BA.Addr->members_if(IsPhi, *this)) {
1455+
for (Def DA : IA.Addr->members_if(IsDef, *this)) {
1456+
auto DR = DA.Addr->getRegRef(*this);
1457+
PhiDefs.insert(DR);
1458+
}
1459+
}
1460+
}
1461+
1462+
MachineRegisterInfo &MRI = MF.getRegInfo();
14371463
const RegisterAggr &Defs = PhiM[BA.Id];
14381464
uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
14391465

14401466
for (RegisterRef RR : Defs.refs()) {
1467+
if (!DefM.empty()) {
1468+
auto F = DefM.find(RR.Reg);
1469+
// Do not create a phi for unallocatable registers, or for registers
1470+
// that are never livein to BA.
1471+
// If a phi exists for RR, do not create another.
1472+
if (!MRI.isAllocatable(RR.Reg) || PhiDefs.hasCoverOf(RR) ||
1473+
F == DefM.end() || F->second.empty())
1474+
continue;
1475+
// Do not create a phi, if all reaching defs are clobbering
1476+
auto RDef = F->second.top();
1477+
if (RDef->Addr->getFlags() & NodeAttrs::Clobbering)
1478+
continue;
1479+
PhiDefs.insert(RR);
1480+
}
14411481
Phi PA = newPhi(BA);
14421482
PA.Addr->addMember(newDef(PA, RR, PhiFlags), *this);
14431483

@@ -1576,7 +1616,15 @@ void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, Stmt SA, Predicate P) {
15761616

15771617
// Create data-flow links for all instructions in the block node BA. This
15781618
// will include updating any phi nodes in BA.
1579-
void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, Block BA) {
1619+
void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, BlockRefsMap &PhiClobberM,
1620+
Block BA) {
1621+
// Create phi nodes for clobbering defs.
1622+
// Since a huge number of registers can get clobbered, it would result in many
1623+
// phi nodes being created in the graph. Only create phi nodes that have a non
1624+
// clobbering reaching def. Use DefM to get not clobbering defs reaching a
1625+
// block.
1626+
buildPhis(PhiClobberM, BA, DefM);
1627+
15801628
// Push block delimiters.
15811629
markBlock(BA.Id, DefM);
15821630

@@ -1613,7 +1661,7 @@ void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, Block BA) {
16131661
for (auto *I : *N) {
16141662
MachineBasicBlock *SB = I->getBlock();
16151663
Block SBA = findBlock(SB);
1616-
linkBlockRefs(DefM, SBA);
1664+
linkBlockRefs(DefM, PhiClobberM, SBA);
16171665
}
16181666

16191667
// Link the phi uses from the successor blocks.
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
# RUN: llc -march=hexagon -run-pass=hexagon-rdf-opt -hexagon-rdf-dump -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s
2+
3+
# Check that RDF graph has a phi node for R28 register in bb.3 and bb.4
4+
# R28 is clobbered by memcpy call. The clobbering def must be present in bb.4's IDF
5+
# This phi node should prevent $r27 from being replaced by $r28 by RDF copy propagation
6+
7+
#CHECK-LABEL: Starting copy propagation on: foo
8+
9+
#CHECK-LABEL: --- %bb.3 ---
10+
#CHECK: p{{[0-9]+}}: phi [+d{{[0-9]+}}<R28>
11+
12+
#CHECK-LABEL: --- %bb.4 ---
13+
#CHECK: p{{[0-9]+}}: phi [+d{{[0-9]+}}<R28>
14+
15+
#CHECK-LABEL: After Hexagon RDF optimizations
16+
#CHECK-LABEL: bb.3:
17+
#CHECK: renamable $r0 = A2_add renamable $r27
18+
19+
--- |
20+
define internal fastcc void @foo() unnamed_addr {
21+
entry:
22+
ret void
23+
}
24+
25+
declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg)
26+
27+
---
28+
name: foo
29+
alignment: 16
30+
exposesReturnsTwice: false
31+
legalized: false
32+
regBankSelected: false
33+
selected: false
34+
failedISel: false
35+
tracksRegLiveness: true
36+
hasWinCFI: false
37+
callsEHReturn: false
38+
callsUnwindInit: false
39+
hasEHCatchret: false
40+
hasEHScopes: false
41+
hasEHFunclets: false
42+
isOutlined: false
43+
debugInstrRef: false
44+
failsVerification: false
45+
tracksDebugUserValues: true
46+
registers: []
47+
liveins:
48+
- { reg: '$d0', virtual-reg: '' }
49+
- { reg: '$d3', virtual-reg: '' }
50+
- { reg: '$r23', virtual-reg: '' }
51+
frameInfo:
52+
isFrameAddressTaken: false
53+
isReturnAddressTaken: false
54+
hasStackMap: false
55+
hasPatchPoint: false
56+
stackSize: 0
57+
offsetAdjustment: 0
58+
maxAlignment: 8
59+
adjustsStack: true
60+
hasCalls: true
61+
stackProtector: ''
62+
functionContext: ''
63+
maxCallFrameSize: 4294967295
64+
cvBytesOfCalleeSavedRegisters: 0
65+
hasOpaqueSPAdjustment: false
66+
hasVAStart: false
67+
hasMustTailInVarArgFunc: false
68+
hasTailCall: false
69+
isCalleeSavedInfoValid: false
70+
localFrameSize: 0
71+
savePoint: ''
72+
restorePoint: ''
73+
fixedStack:
74+
- { id: 0, type: default, offset: 40, size: 8, alignment: 8, stack-id: default,
75+
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
76+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
77+
stack:
78+
- { id: 0, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8,
79+
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
80+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
81+
- { id: 1, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8,
82+
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
83+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
84+
- { id: 2, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8,
85+
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
86+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
87+
- { id: 3, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8,
88+
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
89+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
90+
entry_values: []
91+
callSites: []
92+
debugValueSubstitutions: []
93+
constants: []
94+
machineFunctionInfo: {}
95+
body: |
96+
bb.0.entry:
97+
successors: %bb.1
98+
liveins: $d0, $d3, $r23
99+
100+
J2_jump %bb.1, implicit-def dead $pc
101+
102+
bb.1:
103+
successors: %bb.2
104+
liveins: $d0:0x0000000000000003, $d3:0x0000000000000003, $r23
105+
106+
renamable $r28 = L2_loadri_io %fixed-stack.0, 0 :: (load (s32) from %fixed-stack.0)
107+
renamable $r27 = COPY killed renamable $r28
108+
109+
bb.2:
110+
successors: %bb.3
111+
liveins: $d0:0x0000000000000003, $d3:0x0000000000000003, $r23, $r27
112+
113+
renamable $d10 = L2_loadrd_io %stack.0, 0 :: (load (s64) from %stack.0)
114+
renamable $d11 = L2_loadrd_io %stack.1, 0 :: (load (s64) from %stack.1)
115+
116+
bb.3:
117+
successors: %bb.4, %bb.3
118+
liveins: $d0:0x0000000000000003, $d3:0x0000000000000003, $d10:0x0000000000000003, $d11:0x0000000000000002, $r23, $r27
119+
120+
ADJCALLSTACKDOWN 0, 0, implicit-def $r29, implicit-def dead $r30, implicit $r31, implicit $r30, implicit $r29
121+
renamable $r1 = A2_add renamable $r23, killed renamable $r0
122+
$r2 = COPY renamable $r22
123+
renamable $r0 = A2_add renamable $r27, killed renamable $r6
124+
J2_call &memcpy, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit $r0, implicit $r1, implicit $r2, implicit-def $r29, implicit-def dead $r0
125+
renamable $p0 = C2_cmpgtp renamable $d11, renamable $d10
126+
ADJCALLSTACKUP 0, 0, implicit-def dead $r29, implicit-def dead $r30, implicit-def dead $r31, implicit $r29
127+
J2_jumpt killed renamable $p0, %bb.3, implicit-def dead $pc
128+
J2_jump %bb.4, implicit-def dead $pc
129+
130+
bb.4:
131+
successors: %bb.5, %bb.2
132+
liveins: $d10:0x0000000000000003, $d11:0x0000000000000002, $r23, $r27
133+
134+
renamable $d0 = L2_loadrd_io %stack.2, 0 :: (load (s64) from %stack.2)
135+
renamable $d3 = L2_loadrd_io %stack.3, 0 :: (load (s64) from %stack.3)
136+
renamable $p0 = C2_cmpgtp killed renamable $d0, killed renamable $d3
137+
J2_jumpt killed renamable $p0, %bb.2, implicit-def dead $pc
138+
J2_jump %bb.5, implicit-def dead $pc
139+
140+
bb.5:
141+
PS_jmpret $r31, implicit-def dead $pc
142+
143+
...
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# RUN: llc -march=hexagon -run-pass=hexagon-rdf-opt \
2+
# RUN: -hexagon-rdf-dump -verify-machineinstrs -o /dev/null %s 2>&1 \
3+
# RUN: | FileCheck %s
4+
5+
# Check that phi nodes that only have clobbering reaching defs are not created
6+
# during graph construction. Check that there are no phi nodes for HVX registers
7+
8+
#CHECK-LABEL: --- %bb.1 ---
9+
#CHECK-NOT: p{{[0-9]+}}: phi [+d{{[0-9]+}}<V{{[0-9]+}}>
10+
11+
--- |
12+
@.str.3 = private unnamed_addr constant [2 x i8] c"%d", align 8
13+
@.str.4 = private unnamed_addr constant [2 x i8] c"%d", align 8
14+
15+
define internal fastcc void @foo() unnamed_addr {
16+
entry:
17+
ret void
18+
}
19+
20+
declare dso_local noundef i32 @printf(ptr nocapture noundef readonly, ...) local_unnamed_addr
21+
22+
---
23+
name: foo
24+
alignment: 16
25+
exposesReturnsTwice: false
26+
legalized: false
27+
regBankSelected: false
28+
selected: false
29+
failedISel: false
30+
tracksRegLiveness: true
31+
hasWinCFI: false
32+
callsEHReturn: false
33+
callsUnwindInit: false
34+
hasEHCatchret: false
35+
hasEHScopes: false
36+
hasEHFunclets: false
37+
isOutlined: false
38+
debugInstrRef: false
39+
failsVerification: false
40+
tracksDebugUserValues: true
41+
registers: []
42+
liveins:
43+
- { reg: '$d0', virtual-reg: '' }
44+
- { reg: '$d3', virtual-reg: '' }
45+
- { reg: '$r23', virtual-reg: '' }
46+
frameInfo:
47+
isFrameAddressTaken: false
48+
isReturnAddressTaken: false
49+
hasStackMap: false
50+
hasPatchPoint: false
51+
stackSize: 0
52+
offsetAdjustment: 0
53+
maxAlignment: 8
54+
adjustsStack: true
55+
hasCalls: true
56+
stackProtector: ''
57+
functionContext: ''
58+
maxCallFrameSize: 4294967295
59+
cvBytesOfCalleeSavedRegisters: 0
60+
hasOpaqueSPAdjustment: false
61+
hasVAStart: false
62+
hasMustTailInVarArgFunc: false
63+
hasTailCall: false
64+
isCalleeSavedInfoValid: false
65+
localFrameSize: 0
66+
savePoint: ''
67+
restorePoint: ''
68+
entry_values: []
69+
callSites: []
70+
debugValueSubstitutions: []
71+
constants: []
72+
machineFunctionInfo: {}
73+
body: |
74+
bb.0.entry:
75+
successors: %bb.1
76+
liveins: $r25, $r26, $d11
77+
78+
renamable $r16 = A2_tfrsi 0
79+
S2_storerd_io $r29, 0, renamable $d11 :: (store (s64) into stack)
80+
$r0 = A2_tfrsi @.str.3
81+
J2_call @printf, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit $r0, implicit-def $r29, implicit-def dead $r0
82+
J2_jump %bb.1, implicit-def dead $pc
83+
84+
bb.1:
85+
successors: %bb.2, %bb.1
86+
liveins: $r16, $r25, $r26
87+
88+
S2_storeri_io $r29, 0, killed renamable $r25 :: (store (s32) into stack)
89+
$r0 = A2_tfrsi @.str.4
90+
S2_storeri_io $r29, 8, killed renamable $r26 :: (store (s64) into stack + 8)
91+
J2_call @printf, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit $r0, implicit-def $r29, implicit-def dead $r0
92+
renamable $p0 = C2_cmpgti renamable $r16, 4
93+
renamable $r16 = nsw A2_addi killed renamable $r16, 1
94+
J2_jumpf killed renamable $p0, %bb.2, implicit-def dead $pc
95+
J2_jump %bb.1, implicit-def dead $pc
96+
97+
bb.2:
98+
liveins: $r16, $r25, $r26
99+
100+
PS_jmpret $r31, implicit-def dead $pc
101+
102+
...

0 commit comments

Comments
 (0)