@@ -358,6 +358,8 @@ std::optional<StringRef> ELFObjectFileBase::tryGetCPUName() const {
358
358
switch (getEMachine ()) {
359
359
case ELF::EM_AMDGPU:
360
360
return getAMDGPUCPUName ();
361
+ case ELF::EM_CUDA:
362
+ return getNVPTXCPUName ();
361
363
case ELF::EM_PPC:
362
364
case ELF::EM_PPC64:
363
365
return StringRef (" future" );
@@ -517,6 +519,73 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
517
519
}
518
520
}
519
521
522
+ StringRef ELFObjectFileBase::getNVPTXCPUName () const {
523
+ assert (getEMachine () == ELF::EM_CUDA);
524
+ unsigned SM = getPlatformFlags () & ELF::EF_CUDA_SM;
525
+
526
+ switch (SM) {
527
+ // Fermi architecture.
528
+ case ELF::EF_CUDA_SM20:
529
+ return " sm_20" ;
530
+ case ELF::EF_CUDA_SM21:
531
+ return " sm_21" ;
532
+
533
+ // Kepler architecture.
534
+ case ELF::EF_CUDA_SM30:
535
+ return " sm_30" ;
536
+ case ELF::EF_CUDA_SM32:
537
+ return " sm_32" ;
538
+ case ELF::EF_CUDA_SM35:
539
+ return " sm_35" ;
540
+ case ELF::EF_CUDA_SM37:
541
+ return " sm_37" ;
542
+
543
+ // Maxwell architecture.
544
+ case ELF::EF_CUDA_SM50:
545
+ return " sm_50" ;
546
+ case ELF::EF_CUDA_SM52:
547
+ return " sm_52" ;
548
+ case ELF::EF_CUDA_SM53:
549
+ return " sm_53" ;
550
+
551
+ // Pascal architecture.
552
+ case ELF::EF_CUDA_SM60:
553
+ return " sm_60" ;
554
+ case ELF::EF_CUDA_SM61:
555
+ return " sm_61" ;
556
+ case ELF::EF_CUDA_SM62:
557
+ return " sm_62" ;
558
+
559
+ // Volta architecture.
560
+ case ELF::EF_CUDA_SM70:
561
+ return " sm_70" ;
562
+ case ELF::EF_CUDA_SM72:
563
+ return " sm_72" ;
564
+
565
+ // Turing architecture.
566
+ case ELF::EF_CUDA_SM75:
567
+ return " sm_75" ;
568
+
569
+ // Ampere architecture.
570
+ case ELF::EF_CUDA_SM80:
571
+ return " sm_80" ;
572
+ case ELF::EF_CUDA_SM86:
573
+ return " sm_86" ;
574
+ case ELF::EF_CUDA_SM87:
575
+ return " sm_87" ;
576
+
577
+ // Ada architecture.
578
+ case ELF::EF_CUDA_SM89:
579
+ return " sm_89" ;
580
+
581
+ // Hopper architecture.
582
+ case ELF::EF_CUDA_SM90:
583
+ return getPlatformFlags () & ELF::EF_CUDA_ACCELERATORS ? " sm_90a" : " sm_90" ;
584
+ default :
585
+ llvm_unreachable (" Unknown EF_CUDA_SM value" );
586
+ }
587
+ }
588
+
520
589
// FIXME Encode from a tablegen description or target parser.
521
590
void ELFObjectFileBase::setARMSubArch (Triple &TheTriple) const {
522
591
if (TheTriple.getSubArch () != Triple::NoSubArch)
0 commit comments