Skip to content

[NVPTX] Make ctor/dtor lowering always enabled in NVPTX #126544

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -9193,6 +9193,8 @@ def err_cuda_device_exceptions : Error<
def err_dynamic_var_init : Error<
"dynamic initialization is not supported for "
"__device__, __constant__, __shared__, and __managed__ variables">;
def err_cuda_ctor_dtor_attrs
: Error<"CUDA does not support global %0 for __device__ functions">;
def err_shared_var_init : Error<
"initialization is not supported for __shared__ variables">;
def err_cuda_vla : Error<
Expand Down
19 changes: 4 additions & 15 deletions clang/lib/Driver/ToolChains/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -639,9 +639,6 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back(
Args.MakeArgString("--plugin-opt=-mattr=" + llvm::join(Features, ",")));

// Enable ctor / dtor lowering for the direct / freestanding NVPTX target.
CmdArgs.append({"-mllvm", "--nvptx-lower-global-ctor-dtor"});

// Add paths for the default clang library path.
SmallString<256> DefaultLibPath =
llvm::sys::path::parent_path(TC.getDriver().Dir);
Expand Down Expand Up @@ -726,9 +723,8 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
/// toolchain.
NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
const llvm::Triple &HostTriple,
const ArgList &Args, bool Freestanding = false)
: ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args),
Freestanding(Freestanding) {
const ArgList &Args)
: ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args) {
if (CudaInstallation.isValid())
getProgramPaths().push_back(std::string(CudaInstallation.getBinPath()));
// Lookup binaries into the driver directory, this is used to
Expand All @@ -740,8 +736,7 @@ NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
/// system's default triple if not provided.
NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args)
: NVPTXToolChain(D, Triple, llvm::Triple(LLVM_HOST_TRIPLE), Args,
/*Freestanding=*/true) {}
: NVPTXToolChain(D, Triple, llvm::Triple(LLVM_HOST_TRIPLE), Args) {}

llvm::opt::DerivedArgList *
NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
Expand Down Expand Up @@ -782,13 +777,7 @@ NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,

void NVPTXToolChain::addClangTargetOptions(
const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
Action::OffloadKind DeviceOffloadingKind) const {
// If we are compiling with a standalone NVPTX toolchain we want to try to
// mimic a standard environment as much as possible. So we enable lowering
// ctor / dtor functions to global symbols that can be registered.
if (Freestanding && !getDriver().isUsingLTO())
CC1Args.append({"-mllvm", "--nvptx-lower-global-ctor-dtor"});
}
Action::OffloadKind DeviceOffloadingKind) const {}

bool NVPTXToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
const Option &O = A->getOption();
Expand Down
7 changes: 2 additions & 5 deletions clang/lib/Driver/ToolChains/Cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,8 @@ namespace toolchains {
class LLVM_LIBRARY_VISIBILITY NVPTXToolChain : public ToolChain {
public:
NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args,
bool Freestanding);
const llvm::Triple &HostTriple,
const llvm::opt::ArgList &Args);

NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
Expand Down Expand Up @@ -179,9 +179,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXToolChain : public ToolChain {
protected:
Tool *buildAssembler() const override; // ptxas.
Tool *buildLinker() const override; // nvlink.

private:
bool Freestanding = false;
};

class LLVM_LIBRARY_VISIBILITY CudaToolChain : public NVPTXToolChain {
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7484,6 +7484,15 @@ void Sema::ProcessDeclAttributeList(
}
}

// Do not permit 'constructor' or 'destructor' attributes on __device__ code.
if (getLangOpts().CUDAIsDevice && D->hasAttr<CUDADeviceAttr>() &&
(D->hasAttr<ConstructorAttr>() || D->hasAttr<DestructorAttr>()) &&
!getLangOpts().GPUAllowDeviceInit) {
Diag(D->getLocation(), diag::err_cuda_ctor_dtor_attrs)
<< (D->hasAttr<ConstructorAttr>() ? "constructors" : "destructors");
D->setInvalidDecl();
}

// Do this check after processing D's attributes because the attribute
// objc_method_family can change whether the given method is in the init
// family, and it can be applied after objc_designated_initializer. This is a
Expand Down
13 changes: 0 additions & 13 deletions clang/test/Driver/cuda-cross-compiling.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,19 +57,6 @@

// LINK: clang-nvlink-wrapper{{.*}}"-o" "a.out" "-arch" "sm_61"{{.*}}[[CUBIN:.+]].o

//
// Test to ensure that we enable handling global constructors in a freestanding
// Nvidia compilation.
//
// RUN: %clang -target nvptx64-nvidia-cuda -march=sm_70 %s -### 2>&1 \
// RUN: | FileCheck -check-prefix=LOWERING %s
// RUN: %clang -target nvptx64-nvidia-cuda -march=sm_70 -flto -c %s -### 2>&1 \
// RUN: | FileCheck -check-prefix=LOWERING-LTO %s

// LOWERING: -cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}} "-mllvm" "--nvptx-lower-global-ctor-dtor"
// LOWERING: clang-nvlink-wrapper{{.*}} "-mllvm" "--nvptx-lower-global-ctor-dtor"
// LOWERING-LTO-NOT: "--nvptx-lower-global-ctor-dtor"

//
// Test passing arguments directly to nvlink.
//
Expand Down
9 changes: 9 additions & 0 deletions clang/test/SemaCUDA/device-var-init.cu
Original file line number Diff line number Diff line change
Expand Up @@ -485,3 +485,12 @@ void instantiate() {
bar<NontrivialInitializer><<<1, 1>>>();
// expected-note@-1 {{in instantiation of function template specialization 'bar<NontrivialInitializer>' requested here}}
}

__device__ void *ptr1 = nullptr;
__device__ void *ptr2 = ptr1;
// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}

__device__ [[gnu::constructor(101)]] void ctor() {}
// expected-error@-1 {{CUDA does not support global constructors for __device__ functions}}
__device__ [[gnu::destructor(101)]] void dtor() {}
// expected-error@-1 {{CUDA does not support global destructors for __device__ functions}}
4 changes: 1 addition & 3 deletions libc/cmake/modules/LLVMLibCTestRules.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -560,14 +560,12 @@ function(add_integration_test test_name)
if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
target_link_options(${fq_build_target_name} PRIVATE
${LIBC_COMPILE_OPTIONS_DEFAULT} ${INTEGRATION_TEST_COMPILE_OPTIONS}
-Wno-multi-gpu -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto
"-Wl,-mllvm,-amdgpu-lower-global-ctor-dtor=0" -nostdlib -static
-Wno-multi-gpu -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto -nostdlib -static
"-Wl,-mllvm,-amdhsa-code-object-version=${LIBC_GPU_CODE_OBJECT_VERSION}")
elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
target_link_options(${fq_build_target_name} PRIVATE
${LIBC_COMPILE_OPTIONS_DEFAULT} ${INTEGRATION_TEST_COMPILE_OPTIONS}
"-Wl,--suppress-stack-size-warning" -Wno-multi-gpu
"-Wl,-mllvm,-nvptx-lower-global-ctor-dtor=1"
"-Wl,-mllvm,-nvptx-emit-init-fini-kernel"
-march=${LIBC_GPU_TARGET_ARCHITECTURE} -nostdlib -static
"--cuda-path=${LIBC_CUDA_ROOT}")
Expand Down
2 changes: 0 additions & 2 deletions libcxx/test/configs/nvptx-libc++-shared.cfg.in
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ config.substitutions.append(('%{link_flags}',
'-nostdlib++ -startfiles -stdlib '
'-L %{lib-dir} -lc++ -lc++abi '
'-Wl,--suppress-stack-size-warning '
'-Wl,-mllvm,-nvptx-lower-global-ctor-dtor=1 '
'-Wl,-mllvm,-nvptx-emit-init-fini-kernel'
))
config.substitutions.append(('%{exec}',
'%{executor} --no-parallelism'
Expand Down
21 changes: 0 additions & 21 deletions llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,6 @@

using namespace llvm;

static cl::opt<bool>
LowerCtorDtor("nvptx-lower-global-ctor-dtor",
cl::desc("Lower GPU ctor / dtors to globals on the device."),
cl::init(false), cl::Hidden);

#define DEPOTNAME "__local_depot"

/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
Expand Down Expand Up @@ -794,22 +789,6 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30))
report_fatal_error(".alias requires PTX version >= 6.3 and sm_30");

// OpenMP supports NVPTX global constructors and destructors.
bool IsOpenMP = M.getModuleFlag("openmp") != nullptr;

if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors")) &&
!LowerCtorDtor && !IsOpenMP) {
report_fatal_error(
"Module has a nontrivial global ctor, which NVPTX does not support.");
return true; // error
}
if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors")) &&
!LowerCtorDtor && !IsOpenMP) {
report_fatal_error(
"Module has a nontrivial global dtor, which NVPTX does not support.");
return true; // error
}

// We need to call the parent's one explicitly.
bool Result = AsmPrinter::doInitialization(M);

Expand Down
9 changes: 0 additions & 9 deletions llvm/test/CodeGen/NVPTX/global-ctor.ll

This file was deleted.

9 changes: 0 additions & 9 deletions llvm/test/CodeGen/NVPTX/global-dtor.ll

This file was deleted.

2 changes: 1 addition & 1 deletion llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

; Make sure we get the same result if we run multiple times
; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor,nvptx-lower-ctor-dtor < %s | FileCheck %s
; RUN: llc -nvptx-lower-global-ctor-dtor -mtriple=nvptx64-amd-amdhsa -mcpu=sm_70 -filetype=asm -o - < %s | FileCheck %s -check-prefix=VISIBILITY
; RUN: llc -mtriple=nvptx64-amd-amdhsa -mcpu=sm_70 -filetype=asm -o - < %s | FileCheck %s -check-prefix=VISIBILITY

@llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }]
@llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }]
Expand Down