Skip to content

Commit a35a50c

Browse files
committed
[AMDGPU] Switch to MF.estimateFunctionSizeInBytes()
Both methods are equally inaccurate, we need to switch to MCExpr for better results in the future.
1 parent 8529bd7 commit a35a50c

File tree

4 files changed

+9
-32
lines changed

4 files changed

+9
-32
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8977,7 +8977,7 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
89778977
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo(), &ST);
89788978
}
89798979
default:
8980-
if (MI.isMetaInstruction())
8980+
if (MI.isMetaInstruction() || MI.isDebugInstr())
89818981
return 0;
89828982
return DescSize;
89838983
}

llvm/lib/Target/AMDGPU/SIProgramInfo.cpp

Lines changed: 4 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -202,32 +202,9 @@ const MCExpr *SIProgramInfo::getPGMRSrc2(CallingConv::ID CC,
202202
return MCConstantExpr::create(0, Ctx);
203203
}
204204

205-
uint64_t SIProgramInfo::getFunctionCodeSize(const MachineFunction &MF) {
206-
if (CodeSizeInBytes.has_value())
207-
return *CodeSizeInBytes;
205+
uint64_t SIProgramInfo::getFunctionCodeSize(MachineFunction &MF) {
206+
if (!CodeSizeInBytes.has_value())
207+
CodeSizeInBytes = MF.estimateFunctionSizeInBytes();
208208

209-
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
210-
const SIInstrInfo *TII = STM.getInstrInfo();
211-
212-
uint64_t CodeSize = 0;
213-
214-
for (const MachineBasicBlock &MBB : MF) {
215-
// The amount of padding to align code can be both underestimated and
216-
// overestimated. In case of inline asm used getInstSizeInBytes() will
217-
// return a maximum size of a single instruction, where the real size may
218-
// differ. At this point CodeSize may be already off.
219-
CodeSize = alignTo(CodeSize, MBB.getAlignment());
220-
221-
for (const MachineInstr &MI : MBB) {
222-
// TODO: CodeSize should account for multiple functions.
223-
224-
if (MI.isMetaInstruction())
225-
continue;
226-
227-
CodeSize += TII->getInstSizeInBytes(MI);
228-
}
229-
}
230-
231-
CodeSizeInBytes = CodeSize;
232-
return CodeSize;
209+
return *CodeSizeInBytes;
233210
}

llvm/lib/Target/AMDGPU/SIProgramInfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ struct LLVM_EXTERNAL_VISIBILITY SIProgramInfo {
101101
void reset(const MachineFunction &MF);
102102

103103
// Get function code size and cache the value.
104-
uint64_t getFunctionCodeSize(const MachineFunction &MF);
104+
uint64_t getFunctionCodeSize(MachineFunction &MF);
105105

106106
/// Compute the value of the ComputePGMRsrc1 register.
107107
const MCExpr *getComputePGMRSrc1(const GCNSubtarget &ST,

llvm/test/CodeGen/AMDGPU/code-size-estimate.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ body: |
6161
# CHECK: s_barrier ; encoding: [0x00,0x00,0x8a,0xbf]
6262
# CHECK: .p2align 3
6363
# CHECK: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
64-
# CHECK: ; codeLenInByte = 20
64+
# CHECK: ; codeLenInByte = 24
6565
---
6666
name: align8
6767
tracksRegLiveness: true
@@ -83,7 +83,7 @@ body: |
8383
# CHECK: s_barrier ; encoding: [0x00,0x00,0x8a,0xbf]
8484
# CHECK: .p2align 4
8585
# CHECK: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
86-
# CHECK: ; codeLenInByte = 20
86+
# CHECK: ; codeLenInByte = 32
8787
---
8888
name: align16
8989
tracksRegLiveness: true
@@ -105,7 +105,7 @@ body: |
105105
# CHECK: s_barrier ; encoding: [0x00,0x00,0x8a,0xbf]
106106
# CHECK: .p2align 5
107107
# CHECK: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
108-
# CHECK: ; codeLenInByte = 36
108+
# CHECK: ; codeLenInByte = 64
109109
---
110110
name: align32
111111
tracksRegLiveness: true

0 commit comments

Comments
 (0)