Skip to content

Commit a3248e4

Browse files
committed
[CUDA] Add getTargetFeatures for the NVPTX toolchain
The NVPTX toolchain uses target features to determine the PTX version to use. However this isn't exposed externally like most other toolchain specific target features are. Add this functionaliy in preparation for using it in for OpenMP offloading. Reviewed By: jdoerfert, tra Differential Revision: https://reviews.llvm.org/D122089
1 parent 4275d7e commit a3248e4

File tree

3 files changed

+51
-26
lines changed

3 files changed

+51
-26
lines changed

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,10 @@ static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple,
369369
case llvm::Triple::amdgcn:
370370
amdgpu::getAMDGPUTargetFeatures(D, Triple, Args, Features);
371371
break;
372+
case llvm::Triple::nvptx:
373+
case llvm::Triple::nvptx64:
374+
NVPTX::getNVPTXTargetFeatures(D, Triple, Args, Features);
375+
break;
372376
case llvm::Triple::m68k:
373377
m68k::getM68kTargetFeatures(D, Triple, Args, Features);
374378
break;

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 42 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,43 @@ void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
630630
Exec, CmdArgs, Inputs, Output));
631631
}
632632

633+
void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
634+
const llvm::opt::ArgList &Args,
635+
std::vector<StringRef> &Features,
636+
Optional<clang::CudaVersion> CudaVersion) {
637+
if (!CudaVersion) {
638+
CudaInstallationDetector CudaInstallation(D, Triple, Args);
639+
CudaVersion = CudaInstallation.version();
640+
}
641+
642+
// New CUDA versions often introduce new instructions that are only supported
643+
// by new PTX version, so we need to raise PTX level to enable them in NVPTX
644+
// back-end.
645+
const char *PtxFeature = nullptr;
646+
switch (*CudaVersion) {
647+
#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \
648+
case CudaVersion::CUDA_##CUDA_VER: \
649+
PtxFeature = "+ptx" #PTX_VER; \
650+
break;
651+
CASE_CUDA_VERSION(115, 75);
652+
CASE_CUDA_VERSION(114, 74);
653+
CASE_CUDA_VERSION(113, 73);
654+
CASE_CUDA_VERSION(112, 72);
655+
CASE_CUDA_VERSION(111, 71);
656+
CASE_CUDA_VERSION(110, 70);
657+
CASE_CUDA_VERSION(102, 65);
658+
CASE_CUDA_VERSION(101, 64);
659+
CASE_CUDA_VERSION(100, 63);
660+
CASE_CUDA_VERSION(92, 61);
661+
CASE_CUDA_VERSION(91, 61);
662+
CASE_CUDA_VERSION(90, 60);
663+
#undef CASE_CUDA_VERSION
664+
default:
665+
PtxFeature = "+ptx42";
666+
}
667+
Features.push_back(PtxFeature);
668+
}
669+
633670
/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
634671
/// which isn't properly a linker but nonetheless performs the step of stitching
635672
/// together object files from the assembler into a single blob.
@@ -701,32 +738,11 @@ void CudaToolChain::addClangTargetOptions(
701738

702739
clang::CudaVersion CudaInstallationVersion = CudaInstallation.version();
703740

704-
// New CUDA versions often introduce new instructions that are only supported
705-
// by new PTX version, so we need to raise PTX level to enable them in NVPTX
706-
// back-end.
707-
const char *PtxFeature = nullptr;
708-
switch (CudaInstallationVersion) {
709-
#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \
710-
case CudaVersion::CUDA_##CUDA_VER: \
711-
PtxFeature = "+ptx" #PTX_VER; \
712-
break;
713-
CASE_CUDA_VERSION(115, 75);
714-
CASE_CUDA_VERSION(114, 74);
715-
CASE_CUDA_VERSION(113, 73);
716-
CASE_CUDA_VERSION(112, 72);
717-
CASE_CUDA_VERSION(111, 71);
718-
CASE_CUDA_VERSION(110, 70);
719-
CASE_CUDA_VERSION(102, 65);
720-
CASE_CUDA_VERSION(101, 64);
721-
CASE_CUDA_VERSION(100, 63);
722-
CASE_CUDA_VERSION(92, 61);
723-
CASE_CUDA_VERSION(91, 61);
724-
CASE_CUDA_VERSION(90, 60);
725-
#undef CASE_CUDA_VERSION
726-
default:
727-
PtxFeature = "+ptx42";
728-
}
729-
CC1Args.append({"-target-feature", PtxFeature});
741+
std::vector<StringRef> Features;
742+
NVPTX::getNVPTXTargetFeatures(getDriver(), getTriple(), DriverArgs, Features,
743+
CudaInstallationVersion);
744+
for (StringRef PtxFeature : Features)
745+
CC1Args.append({"-target-feature", DriverArgs.MakeArgString(PtxFeature)});
730746
if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
731747
options::OPT_fno_cuda_short_ptr, false))
732748
CC1Args.append({"-mllvm", "--nvptx-short-ptr"});

clang/lib/Driver/ToolChains/Cuda.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,11 @@ class LLVM_LIBRARY_VISIBILITY OpenMPLinker : public Tool {
124124
const char *LinkingOutput) const override;
125125
};
126126

127+
void getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
128+
const llvm::opt::ArgList &Args,
129+
std::vector<StringRef> &Features,
130+
Optional<clang::CudaVersion> CudaVersion = None);
131+
127132
} // end namespace NVPTX
128133
} // end namespace tools
129134

0 commit comments

Comments
 (0)