Skip to content

Commit 2921a09

Browse files
jle-quelldrumm
authored andcommitted
Make the argument -Xcuda-ptxas visible to the driver in cl-mode
It has been noticed that the arguments are being passed twice to ptxas. This also has been fixed by filtering out the arguments before appending them to the new DAL created by CudaToolChain::TranslateArgs. github:#86807
1 parent f6357bb commit 2921a09

File tree

3 files changed

+14
-2
lines changed

3 files changed

+14
-2
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1003,7 +1003,8 @@ def : Joined<["-"], "Xclang=">, Group<CompileOnly_Group>,
10031003
def Xcuda_fatbinary : Separate<["-"], "Xcuda-fatbinary">,
10041004
HelpText<"Pass <arg> to fatbinary invocation">, MetaVarName<"<arg>">;
10051005
def Xcuda_ptxas : Separate<["-"], "Xcuda-ptxas">,
1006-
HelpText<"Pass <arg> to the ptxas assembler">, MetaVarName<"<arg>">;
1006+
HelpText<"Pass <arg> to the ptxas assembler">, MetaVarName<"<arg>">,
1007+
Visibility<[ClangOption, CLOption]>;
10071008
def Xopenmp_target : Separate<["-"], "Xopenmp-target">, Group<CompileOnly_Group>,
10081009
HelpText<"Pass <arg> to the target offloading toolchain.">, MetaVarName<"<arg>">;
10091010
def Xopenmp_target_EQ : JoinedAndSeparate<["-"], "Xopenmp-target=">, Group<CompileOnly_Group>,

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -990,7 +990,10 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
990990
}
991991

992992
for (Arg *A : Args) {
993-
DAL->append(A);
993+
// Make sure flags are not duplicated.
994+
if (!llvm::is_contained(*DAL, A)) {
995+
DAL->append(A);
996+
}
994997
}
995998

996999
if (!BoundArch.empty()) {

clang/test/Driver/cuda-external-tools.cu

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,12 @@
8686
// RUN: -Xcuda-fatbinary -bar1 -Xcuda-ptxas -foo2 -Xcuda-fatbinary -bar2 %s 2>&1 \
8787
// RUN: | FileCheck -check-prefixes=CHECK,SM35,PTXAS-EXTRA,FATBINARY-EXTRA %s
8888

89+
// Check -Xcuda-ptxas with clang-cl
90+
// RUN: %clang_cl -### -c -Xcuda-ptxas -foo1 \
91+
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
92+
// RUN: -Xcuda-ptxas -foo2 %s 2>&1 \
93+
// RUN: | FileCheck -check-prefixes=CHECK,SM35,PTXAS-EXTRA %s
94+
8995
// MacOS spot-checks
9096
// RUN: %clang -### --target=x86_64-apple-macosx -O0 -c %s 2>&1 \
9197
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
@@ -140,6 +146,8 @@
140146
// CHECK-SAME: "[[PTXFILE]]"
141147
// PTXAS-EXTRA-SAME: "-foo1"
142148
// PTXAS-EXTRA-SAME: "-foo2"
149+
// CHECK-NOT: "-foo1"
150+
// CHECK-NOT: "-foo2"
143151
// RDC-SAME: "-c"
144152
// CHECK-NOT: "-c"
145153

0 commit comments

Comments
 (0)