Skip to content

Commit deab58d

Browse files
authored
[ELF] Add CPU name detection for CUDA architectures (#75964)
Summary: Recently we added support for detecting the CUDA processor with the ELF flags. This allows us to get a string representation of it in other code. This will be used by the offloading runtime.
1 parent 5641422 commit deab58d

File tree

2 files changed

+70
-0
lines changed

2 files changed

+70
-0
lines changed

llvm/include/llvm/Object/ELFObjectFile.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ class ELFObjectFileBase : public ObjectFile {
6464
SubtargetFeatures getLoongArchFeatures() const;
6565

6666
StringRef getAMDGPUCPUName() const;
67+
StringRef getNVPTXCPUName() const;
6768

6869
protected:
6970
ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source);

llvm/lib/Object/ELFObjectFile.cpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,8 @@ std::optional<StringRef> ELFObjectFileBase::tryGetCPUName() const {
358358
switch (getEMachine()) {
359359
case ELF::EM_AMDGPU:
360360
return getAMDGPUCPUName();
361+
case ELF::EM_CUDA:
362+
return getNVPTXCPUName();
361363
case ELF::EM_PPC:
362364
case ELF::EM_PPC64:
363365
return StringRef("future");
@@ -517,6 +519,73 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
517519
}
518520
}
519521

522+
StringRef ELFObjectFileBase::getNVPTXCPUName() const {
523+
assert(getEMachine() == ELF::EM_CUDA);
524+
unsigned SM = getPlatformFlags() & ELF::EF_CUDA_SM;
525+
526+
switch (SM) {
527+
// Fermi architecture.
528+
case ELF::EF_CUDA_SM20:
529+
return "sm_20";
530+
case ELF::EF_CUDA_SM21:
531+
return "sm_21";
532+
533+
// Kepler architecture.
534+
case ELF::EF_CUDA_SM30:
535+
return "sm_30";
536+
case ELF::EF_CUDA_SM32:
537+
return "sm_32";
538+
case ELF::EF_CUDA_SM35:
539+
return "sm_35";
540+
case ELF::EF_CUDA_SM37:
541+
return "sm_37";
542+
543+
// Maxwell architecture.
544+
case ELF::EF_CUDA_SM50:
545+
return "sm_50";
546+
case ELF::EF_CUDA_SM52:
547+
return "sm_52";
548+
case ELF::EF_CUDA_SM53:
549+
return "sm_53";
550+
551+
// Pascal architecture.
552+
case ELF::EF_CUDA_SM60:
553+
return "sm_60";
554+
case ELF::EF_CUDA_SM61:
555+
return "sm_61";
556+
case ELF::EF_CUDA_SM62:
557+
return "sm_62";
558+
559+
// Volta architecture.
560+
case ELF::EF_CUDA_SM70:
561+
return "sm_70";
562+
case ELF::EF_CUDA_SM72:
563+
return "sm_72";
564+
565+
// Turing architecture.
566+
case ELF::EF_CUDA_SM75:
567+
return "sm_75";
568+
569+
// Ampere architecture.
570+
case ELF::EF_CUDA_SM80:
571+
return "sm_80";
572+
case ELF::EF_CUDA_SM86:
573+
return "sm_86";
574+
case ELF::EF_CUDA_SM87:
575+
return "sm_87";
576+
577+
// Ada architecture.
578+
case ELF::EF_CUDA_SM89:
579+
return "sm_89";
580+
581+
// Hopper architecture.
582+
case ELF::EF_CUDA_SM90:
583+
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_90a" : "sm_90";
584+
default:
585+
llvm_unreachable("Unknown EF_CUDA_SM value");
586+
}
587+
}
588+
520589
// FIXME Encode from a tablegen description or target parser.
521590
void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const {
522591
if (TheTriple.getSubArch() != Triple::NoSubArch)

0 commit comments

Comments
 (0)