Skip to content

Commit af0b942

Browse files
author
Hugh Delaney
committed
Changing the default optimisation of ptxas to agree with ptxjitcompiler
1 parent 756c2e8 commit af0b942

File tree

2 files changed

+6
-9
lines changed

2 files changed

+6
-9
lines changed

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -422,9 +422,6 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
422422
CmdArgs.push_back("--return-at-end");
423423
} else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
424424
// Map the -O we received to -O{0,1,2,3}.
425-
//
426-
// TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
427-
// default, so it may correspond more closely to the spirit of clang -O2.
428425

429426
// -O3 seems like the least-bad option when -Osomething is specified to
430427
// clang but it isn't handled below.
@@ -446,9 +443,9 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
446443
}
447444
CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
448445
} else {
449-
// If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
450-
// to no optimizations, but ptxas's default is -O3.
451-
CmdArgs.push_back("-O0");
446+
// If no -O was passed, pass -O3 to ptxas -- this makes ptxas's
447+
// optimization level the same as the ptxjitcompiler.
448+
CmdArgs.push_back("-O3");
452449
}
453450
if (DIKind == DebugDirectivesOnly)
454451
CmdArgs.push_back("-lineinfo");

clang/test/Driver/cuda-external-tools.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@
4040
// RUN: --no-cuda-noopt-device-debug -O2 -c %s 2>&1 \
4141
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT2 %s
4242

43-
// Regular compile without -O. This should result in us passing -O0 to ptxas.
43+
// Regular compile without -O. This should result in us passing -O3 to ptxas.
4444
// RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
4545
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
46-
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s
46+
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT3 %s
4747

4848
// Regular compiles with -Os and -Oz. For lack of a better option, we map
4949
// these to ptxas -O3.
@@ -75,7 +75,7 @@
7575
// Compile with -fintegrated-as. This should still cause us to invoke ptxas.
7676
// RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -c %s 2>&1 \
7777
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
78-
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s
78+
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT3 %s
7979
// Check that we still pass -c when generating relocatable device code.
8080
// RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -fgpu-rdc -c %s 2>&1 \
8181
// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \

0 commit comments

Comments
 (0)