Skip to content

[NVPTX] Allow compiling LLVM-IR without -march set #79873

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/DiagnosticDriverKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ def warn_drv_avr_stdlib_not_linked: Warning<
InGroup<AVRRtlibLinkingQuirks>;
def err_drv_cuda_bad_gpu_arch : Error<"unsupported CUDA gpu architecture: %0">;
def err_drv_offload_bad_gpu_arch : Error<"unsupported %0 gpu architecture: %1">;
def err_drv_offload_missing_gpu_arch : Error<
"Must pass in an explicit %0 gpu architecture to '%1'">;
def err_drv_no_cuda_installation : Error<
"cannot find CUDA installation; provide its path via '--cuda-path', or pass "
"'-nocudainc' to build without CUDA includes">;
Expand Down
9 changes: 7 additions & 2 deletions clang/lib/Basic/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
// Define available target features
// These must be defined in sorted order!
NoAsmVariants = true;
GPU = CudaArch::SM_20;
GPU = CudaArch::UNUSED;

if (TargetPointerWidth == 32)
resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
Expand Down Expand Up @@ -169,6 +169,11 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
Builder.defineMacro("__PTX__");
Builder.defineMacro("__NVPTX__");

// Skip setting architecture dependent macros if undefined.
if (GPU == CudaArch::UNUSED && !HostTarget)
return;

if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) {
// Set __CUDA_ARCH__ for the GPU specified.
std::string CUDAArchCode = [this] {
Expand Down Expand Up @@ -220,10 +225,10 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case CudaArch::Generic:
case CudaArch::LAST:
break;
case CudaArch::UNUSED:
case CudaArch::UNKNOWN:
assert(false && "No GPU arch when compiling CUDA device code.");
return "";
case CudaArch::UNUSED:
case CudaArch::SM_20:
return "200";
case CudaArch::SM_21:
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Basic/Targets/NVPTX.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {
initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,
StringRef CPU,
const std::vector<std::string> &FeaturesVec) const override {
Features[CudaArchToString(GPU)] = true;
if (GPU != CudaArch::UNUSED)
Features[CudaArchToString(GPU)] = true;
Features["ptx" + std::to_string(PTXVersion)] = true;
return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
}
Expand Down
20 changes: 14 additions & 6 deletions clang/lib/Driver/ToolChains/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,11 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
GPUArchName = JA.getOffloadingArch();
} else {
GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
assert(!GPUArchName.empty() && "Must have an architecture passed in.");
if (GPUArchName.empty()) {
C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch)
<< getToolChain().getArchName() << getShortName();
return;
}
}

// Obtain architecture from the action.
Expand Down Expand Up @@ -593,7 +597,11 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-v");

StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ);
assert(!GPUArch.empty() && "At least one GPU Arch required for nvlink.");
if (GPUArch.empty()) {
C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch)
<< getToolChain().getArchName() << getShortName();
return;
}

CmdArgs.push_back("-arch");
CmdArgs.push_back(Args.MakeArgString(GPUArch));
Expand Down Expand Up @@ -726,9 +734,8 @@ NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
llvm::opt::DerivedArgList *
NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
StringRef BoundArch,
Action::OffloadKind DeviceOffloadKind) const {
DerivedArgList *DAL =
ToolChain::TranslateArgs(Args, BoundArch, DeviceOffloadKind);
Action::OffloadKind OffloadKind) const {
DerivedArgList *DAL = ToolChain::TranslateArgs(Args, BoundArch, OffloadKind);
if (!DAL)
DAL = new DerivedArgList(Args.getBaseArgs());

Expand All @@ -738,7 +745,8 @@ NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
if (!llvm::is_contained(*DAL, A))
DAL->append(A);

if (!DAL->hasArg(options::OPT_march_EQ)) {
// TODO: We should accept 'generic' as a valid architecture.
if (!DAL->hasArg(options::OPT_march_EQ) && OffloadKind != Action::OFK_None) {
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
CudaArchToString(CudaArch::CudaDefault));
} else if (DAL->getLastArgValue(options::OPT_march_EQ) == "native") {
Expand Down
24 changes: 14 additions & 10 deletions clang/test/Driver/cuda-cross-compiling.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,6 @@

// LINK: nvlink{{.*}}"-o" "a.out" "-arch" "sm_61" {{.*}} "{{.*}}.cubin"

//
// Test the generated arguments default to a value with no architecture.
//
// RUN: %clang --target=nvptx64-nvidia-cuda -### --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck -check-prefix=DEFAULT %s

// DEFAULT: -cc1" "-triple" "nvptx64-nvidia-cuda" "-S" {{.*}} "-target-cpu" "sm_52" "-target-feature" "+ptx{{[0-9]+}}" {{.*}} "-o" "[[PTX:.+]].s"
// DEFAULT-NEXT: ptxas{{.*}}"-m64" "-O0" "--gpu-name" "sm_52" "--output-file" "[[CUBIN:.+]].cubin" "[[PTX]].s" "-c"
// DEFAULT-NEXT: nvlink{{.*}}"-o" "a.out" "-arch" "sm_52" {{.*}} "[[CUBIN]].cubin"

//
// Test to ensure that we enable handling global constructors in a freestanding
// Nvidia compilation.
Expand All @@ -77,3 +67,17 @@
// RUN: | FileCheck -check-prefix=LOWERING %s

// LOWERING: -cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}} "-mllvm" "--nvptx-lower-global-ctor-dtor"

//
// Tests for handling a missing architecture.
//
// RUN: not %clang -target nvptx64-nvidia-cuda %s -### 2>&1 \
// RUN: | FileCheck -check-prefix=MISSING %s

// MISSING: error: Must pass in an explicit nvptx64 gpu architecture to 'ptxas'
// MISSING: error: Must pass in an explicit nvptx64 gpu architecture to 'nvlink'

// RUN: %clang -target nvptx64-nvidia-cuda -flto -c %s -### 2>&1 \
// RUN: | FileCheck -check-prefix=GENERIC %s

// GENERIC-NOT: -cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}} "-target-cpu"
12 changes: 12 additions & 0 deletions clang/test/Preprocessor/predefined-arch-macros.c
Original file line number Diff line number Diff line change
Expand Up @@ -4292,6 +4292,18 @@
// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ZVECTOR
// CHECK_SYSTEMZ_ZVECTOR: #define __VEC__ 10304

// Begin nvptx tests ----------------

// RUN: %clang -march=sm_75 -E -dM %s -o - 2>&1 \
// RUN: -target nvptx64-unknown-unknown \
// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_NVPTX,CHECK_ARCH_SM_75
// RUN: %clang -E -dM %s -o - 2>&1 \
// RUN: -target nvptx64-unknown-unknown \
// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_NVPTX,CHECK_ARCH_UNSET
// CHECK_ARCH_SM_75: #define __CUDA_ARCH__ 750
// CHECK_ARCH_UNSET-NOT: #define __CUDA_ARCH__
// CHECK_NVPTX: #define __NVPTX__ 1

// Begin amdgcn tests ----------------

// RUN: %clang -march=amdgcn -E -dM %s -o - 2>&1 \
Expand Down