Skip to content

Commit 5f35e2c

Browse files
committed
[AMDGPU] Respect MBB alignment in the getFunctionCodeSize()
1 parent 6646b65 commit 5f35e2c

File tree

2 files changed

+95
-0
lines changed

2 files changed

+95
-0
lines changed

llvm/lib/Target/AMDGPU/SIProgramInfo.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,12 @@ uint64_t SIProgramInfo::getFunctionCodeSize(const MachineFunction &MF) {
212212
uint64_t CodeSize = 0;
213213

214214
for (const MachineBasicBlock &MBB : MF) {
215+
// The amount of padding to align code can be both underestimated and
216+
// overestimated. In case of inline asm used getInstSizeInBytes() will
217+
// return a maximum size of a single instruction, where the real size may
218+
// differ. At this point CodeSize may be already off.
219+
CodeSize = alignTo(CodeSize, MBB.getAlignment());
220+
215221
for (const MachineInstr &MI : MBB) {
216222
// TODO: CodeSize should account for multiple functions.
217223

llvm/test/CodeGen/AMDGPU/code-size-estimate.mir

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,92 @@ body: |
3131
3232
WAVE_BARRIER
3333
...
34+
35+
# CHECK: align4: ; @align4
36+
# CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
37+
# CHECK: s_cbranch_scc1 .LBB{{[0-9_]+}} ; encoding: [A,A,0x85,0xbf]
38+
# CHECK: s_barrier ; encoding: [0x00,0x00,0x8a,0xbf]
39+
# CHECK: .p2align 2
40+
# CHECK: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
41+
# CHECK: ; codeLenInByte = 16
42+
43+
---
44+
name: align4
45+
tracksRegLiveness: true
46+
body: |
47+
bb.0:
48+
$scc = IMPLICIT_DEF
49+
S_CBRANCH_SCC1 %bb.2, implicit $scc
50+
51+
bb.1:
52+
S_BARRIER
53+
54+
bb.2 (align 4):
55+
S_ENDPGM 0
56+
...
57+
58+
# CHECK: align8: ; @align8
59+
# CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
60+
# CHECK: s_cbranch_scc1 .LBB{{[0-9_]+}} ; encoding: [A,A,0x85,0xbf]
61+
# CHECK: s_barrier ; encoding: [0x00,0x00,0x8a,0xbf]
62+
# CHECK: .p2align 3
63+
# CHECK: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
64+
# CHECK: ; codeLenInByte = 20
65+
---
66+
name: align8
67+
tracksRegLiveness: true
68+
body: |
69+
bb.0:
70+
$scc = IMPLICIT_DEF
71+
S_CBRANCH_SCC1 %bb.2, implicit $scc
72+
73+
bb.1:
74+
S_BARRIER
75+
76+
bb.2 (align 8):
77+
S_ENDPGM 0
78+
...
79+
80+
# CHECK: align16: ; @align16
81+
# CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
82+
# CHECK: s_cbranch_scc1 .LBB{{[0-9_]+}} ; encoding: [A,A,0x85,0xbf]
83+
# CHECK: s_barrier ; encoding: [0x00,0x00,0x8a,0xbf]
84+
# CHECK: .p2align 4
85+
# CHECK: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
86+
# CHECK: ; codeLenInByte = 20
87+
---
88+
name: align16
89+
tracksRegLiveness: true
90+
body: |
91+
bb.0:
92+
$scc = IMPLICIT_DEF
93+
S_CBRANCH_SCC1 %bb.2, implicit $scc
94+
95+
bb.1:
96+
S_BARRIER
97+
98+
bb.2 (align 16):
99+
S_ENDPGM 0
100+
...
101+
102+
# CHECK: align32: ; @align32
103+
# CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
104+
# CHECK: s_cbranch_scc1 .LBB{{[0-9_]+}} ; encoding: [A,A,0x85,0xbf]
105+
# CHECK: s_barrier ; encoding: [0x00,0x00,0x8a,0xbf]
106+
# CHECK: .p2align 5
107+
# CHECK: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
108+
# CHECK: ; codeLenInByte = 36
109+
---
110+
name: align32
111+
tracksRegLiveness: true
112+
body: |
113+
bb.0:
114+
$scc = IMPLICIT_DEF
115+
S_CBRANCH_SCC1 %bb.2, implicit $scc
116+
117+
bb.1:
118+
S_BARRIER
119+
120+
bb.2 (align 32):
121+
S_ENDPGM 0
122+
...

0 commit comments

Comments
 (0)