Skip to content

Commit e760e85

Browse files
Ronak ChauhanRonak Chauhan
authored andcommitted
[llvm-objdump][AMDGPU] Detect CPU string
AMDGPU ISA isn't backwards compatible and hence -mcpu must always be specified during disassembly. However, the AMDGPU target CPU is stored in e_flags in the ELF object. This patch allows targets to implement CPU string detection, and also implements it for AMDGPU by looking at e_flags. Reviewed By: scott.linder Differential Revision: https://reviews.llvm.org/D84519
1 parent dcd4589 commit e760e85

File tree

5 files changed

+203
-0
lines changed

5 files changed

+203
-0
lines changed

llvm/include/llvm/Object/ELFObjectFile.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ class ELFObjectFileBase : public ObjectFile {
8686

8787
SubtargetFeatures getRISCVFeatures() const;
8888

89+
Optional<StringRef> tryGetCPUName() const override;
90+
91+
StringRef getAMDGPUCPUName() const;
92+
8993
void setARMSubArch(Triple &TheTriple) const override;
9094

9195
virtual uint16_t getEType() const = 0;

llvm/include/llvm/Object/ObjectFile.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,7 @@ class ObjectFile : public SymbolicFile {
327327
virtual StringRef getFileFormatName() const = 0;
328328
virtual Triple::ArchType getArch() const = 0;
329329
virtual SubtargetFeatures getFeatures() const = 0;
330+
virtual Optional<StringRef> tryGetCPUName() const { return None; };
330331
virtual void setARMSubArch(Triple &TheTriple) const { }
331332
virtual Expected<uint64_t> getStartAddress() const {
332333
return errorCodeToError(object_error::parse_failed);

llvm/lib/Object/ELFObjectFile.cpp

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,117 @@ SubtargetFeatures ELFObjectFileBase::getFeatures() const {
355355
}
356356
}
357357

358+
Optional<StringRef> ELFObjectFileBase::tryGetCPUName() const {
359+
switch (getEMachine()) {
360+
case ELF::EM_AMDGPU:
361+
return getAMDGPUCPUName();
362+
default:
363+
return None;
364+
}
365+
}
366+
367+
StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
368+
assert(getEMachine() == ELF::EM_AMDGPU);
369+
unsigned CPU = getPlatformFlags() & ELF::EF_AMDGPU_MACH;
370+
371+
switch (CPU) {
372+
// Radeon HD 2000/3000 Series (R600).
373+
case ELF::EF_AMDGPU_MACH_R600_R600:
374+
return "r600";
375+
case ELF::EF_AMDGPU_MACH_R600_R630:
376+
return "r630";
377+
case ELF::EF_AMDGPU_MACH_R600_RS880:
378+
return "rs880";
379+
case ELF::EF_AMDGPU_MACH_R600_RV670:
380+
return "rv670";
381+
382+
// Radeon HD 4000 Series (R700).
383+
case ELF::EF_AMDGPU_MACH_R600_RV710:
384+
return "rv710";
385+
case ELF::EF_AMDGPU_MACH_R600_RV730:
386+
return "rv730";
387+
case ELF::EF_AMDGPU_MACH_R600_RV770:
388+
return "rv770";
389+
390+
// Radeon HD 5000 Series (Evergreen).
391+
case ELF::EF_AMDGPU_MACH_R600_CEDAR:
392+
return "cedar";
393+
case ELF::EF_AMDGPU_MACH_R600_CYPRESS:
394+
return "cypress";
395+
case ELF::EF_AMDGPU_MACH_R600_JUNIPER:
396+
return "juniper";
397+
case ELF::EF_AMDGPU_MACH_R600_REDWOOD:
398+
return "redwood";
399+
case ELF::EF_AMDGPU_MACH_R600_SUMO:
400+
return "sumo";
401+
402+
// Radeon HD 6000 Series (Northern Islands).
403+
case ELF::EF_AMDGPU_MACH_R600_BARTS:
404+
return "barts";
405+
case ELF::EF_AMDGPU_MACH_R600_CAICOS:
406+
return "caicos";
407+
case ELF::EF_AMDGPU_MACH_R600_CAYMAN:
408+
return "cayman";
409+
case ELF::EF_AMDGPU_MACH_R600_TURKS:
410+
return "turks";
411+
412+
// AMDGCN GFX6.
413+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600:
414+
return "gfx600";
415+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601:
416+
return "gfx601";
417+
418+
// AMDGCN GFX7.
419+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700:
420+
return "gfx700";
421+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701:
422+
return "gfx701";
423+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702:
424+
return "gfx702";
425+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703:
426+
return "gfx703";
427+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704:
428+
return "gfx704";
429+
430+
// AMDGCN GFX8.
431+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801:
432+
return "gfx801";
433+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802:
434+
return "gfx802";
435+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803:
436+
return "gfx803";
437+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810:
438+
return "gfx810";
439+
440+
// AMDGCN GFX9.
441+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900:
442+
return "gfx900";
443+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902:
444+
return "gfx902";
445+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904:
446+
return "gfx904";
447+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906:
448+
return "gfx906";
449+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908:
450+
return "gfx908";
451+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909:
452+
return "gfx909";
453+
454+
// AMDGCN GFX10.
455+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010:
456+
return "gfx1010";
457+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011:
458+
return "gfx1011";
459+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012:
460+
return "gfx1012";
461+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030:
462+
return "gfx1030";
463+
464+
default:
465+
llvm_unreachable("Unknown EF_AMDGPU_MACH value");
466+
}
467+
}
468+
358469
// FIXME Encode from a tablegen description or target parser.
359470
void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const {
360471
if (TheTriple.getSubArch() != Triple::NoSubArch)
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
define amdgpu_kernel void @test_kernel() {
2+
ret void
3+
}
4+
5+
; Test subtarget detection. Disassembly is only supported for GFX8 and beyond.
6+
;
7+
; ----------------------------------GFX10--------------------------------------
8+
;
9+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -filetype=obj -O0 -o %t.o %s
10+
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1030 %t.o > %t-specify.txt
11+
; RUN: llvm-objdump -D %t.o > %t-detect.txt
12+
; RUN: diff %t-specify.txt %t-detect.txt
13+
14+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -filetype=obj -O0 -o %t.o %s
15+
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1012 %t.o > %t-specify.txt
16+
; RUN: llvm-objdump -D %t.o > %t-detect.txt
17+
; RUN: diff %t-specify.txt %t-detect.txt
18+
19+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 -filetype=obj -O0 -o %t.o %s
20+
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1011 %t.o > %t-specify.txt
21+
; RUN: llvm-objdump -D %t.o > %t-detect.txt
22+
; RUN: diff %t-specify.txt %t-detect.txt
23+
24+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj -O0 -o %t.o %s
25+
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1010 %t.o > %t-specify.txt
26+
; RUN: llvm-objdump -D %t.o > %t-detect.txt
27+
; RUN: diff %t-specify.txt %t-detect.txt
28+
29+
30+
; ----------------------------------GFX9---------------------------------------
31+
;
32+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 -filetype=obj -O0 -o %t.o %s
33+
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx909 %t.o > %t-specify.txt
34+
; RUN: llvm-objdump -D %t.o > %t-detect.txt
35+
; RUN: diff %t-specify.txt %t-detect.txt
36+
37+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -O0 -o %t.o %s
38+
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx908 %t.o > %t-specify.txt
39+
; RUN: llvm-objdump -D %t.o > %t-detect.txt
40+
; RUN: diff %t-specify.txt %t-detect.txt
41+
42+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -filetype=obj -O0 -o %t.o %s
43+
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx906 %t.o > %t-specify.txt
44+
; RUN: llvm-objdump -D %t.o > %t-detect.txt
45+
; RUN: diff %t-specify.txt %t-detect.txt
46+
47+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -filetype=obj -O0 -o %t.o %s
48+
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx904 %t.o > %t-specify.txt
49+
; RUN: llvm-objdump -D %t.o > %t-detect.txt
50+
; RUN: diff %t-specify.txt %t-detect.txt
51+
52+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -filetype=obj -O0 -o %t.o %s
53+
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx902 %t.o > %t-specify.txt
54+
; RUN: llvm-objdump -D %t.o > %t-detect.txt
55+
; RUN: diff %t-specify.txt %t-detect.txt
56+
57+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -O0 -o %t.o %s
58+
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx900 %t.o > %t-specify.txt
59+
; RUN: llvm-objdump -D %t.o > %t-detect.txt
60+
; RUN: diff %t-specify.txt %t-detect.txt
61+
62+
63+
; ----------------------------------GFX8---------------------------------------
64+
;
65+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -filetype=obj -O0 -o %t.o %s
66+
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx810 %t.o > %t-specify.txt
67+
; RUN: llvm-objdump -D %t.o > %t-detect.txt
68+
; RUN: diff %t-specify.txt %t-detect.txt
69+
70+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -O0 -o %t.o %s
71+
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx803 %t.o > %t-specify.txt
72+
; RUN: llvm-objdump -D %t.o > %t-detect.txt
73+
; RUN: diff %t-specify.txt %t-detect.txt
74+
75+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -O0 -o %t.o %s
76+
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx802 %t.o > %t-specify.txt
77+
; RUN: llvm-objdump -D %t.o > %t-detect.txt
78+
; RUN: diff %t-specify.txt %t-detect.txt
79+
80+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 -filetype=obj -O0 -o %t.o %s
81+
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx801 %t.o > %t-specify.txt
82+
; RUN: llvm-objdump -D %t.o > %t-detect.txt
83+
; RUN: diff %t-specify.txt %t-detect.txt

llvm/tools/llvm-objdump/llvm-objdump.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2170,6 +2170,10 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
21702170
if (!AsmInfo)
21712171
reportError(Obj->getFileName(),
21722172
"no assembly info for target " + TripleName);
2173+
2174+
if (MCPU.empty())
2175+
MCPU = Obj->tryGetCPUName().getValueOr("").str();
2176+
21732177
std::unique_ptr<const MCSubtargetInfo> STI(
21742178
TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString()));
21752179
if (!STI)

0 commit comments

Comments
 (0)