Skip to content

Commit 41cece8

Browse files
authored
[flang] Add -f[no-]vectorize flags (#119718)
This patch adds the -fvectorize and -fno-vectorize flags to flang. Note that this also changes the behaviour of `flang -fc1` to match that of `clang -cc1`, which is that vectorization is only enabled in the presence of the `-vectorize-loops` flag. Additionally, this patch changes the behaviour of the default optimisation levels to match clang, such that vectorization only happens at the same levels as it does there. This patch is in draft while I write an RFC to discuss the above two changes.
1 parent a03f064 commit 41cece8

File tree

11 files changed

+75
-51
lines changed

11 files changed

+75
-51
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3997,11 +3997,15 @@ defm assumptions : BoolFOption<"assumptions",
39973997
"Disable codegen and compile-time checks for C++23's [[assume]] attribute">,
39983998
PosFlag<SetTrue>>;
39993999

4000+
4001+
let Visibility = [ClangOption, FlangOption] in {
40004002
def fvectorize : Flag<["-"], "fvectorize">, Group<f_Group>,
40014003
HelpText<"Enable the loop vectorization passes">;
40024004
def fno_vectorize : Flag<["-"], "fno-vectorize">, Group<f_Group>;
40034005
def : Flag<["-"], "ftree-vectorize">, Alias<fvectorize>;
40044006
def : Flag<["-"], "fno-tree-vectorize">, Alias<fno_vectorize>;
4007+
}
4008+
40054009
def fslp_vectorize : Flag<["-"], "fslp-vectorize">, Group<f_Group>,
40064010
HelpText<"Enable the superword-level parallelism vectorization passes">;
40074011
def fno_slp_vectorize : Flag<["-"], "fno-slp-vectorize">, Group<f_Group>;
@@ -7343,6 +7347,10 @@ def mlink_builtin_bitcode : Separate<["-"], "mlink-builtin-bitcode">,
73437347
def mlink_bitcode_file
73447348
: Separate<["-"], "mlink-bitcode-file">,
73457349
HelpText<"Link the given bitcode file before performing optimizations.">;
7350+
7351+
def vectorize_loops : Flag<["-"], "vectorize-loops">,
7352+
HelpText<"Run the Loop vectorization passes">,
7353+
MarshallingInfoFlag<CodeGenOpts<"VectorizeLoop">>;
73467354
} // let Visibility = [CC1Option, FC1Option]
73477355

73487356
let Visibility = [CC1Option] in {
@@ -7458,9 +7466,6 @@ defm link_builtin_bitcode_postopt: BoolMOption<"link-builtin-bitcode-postopt",
74587466
PosFlag<SetTrue, [], [ClangOption], "Link builtin bitcodes after the "
74597467
"optimization pipeline">,
74607468
NegFlag<SetFalse, [], [ClangOption]>>;
7461-
def vectorize_loops : Flag<["-"], "vectorize-loops">,
7462-
HelpText<"Run the Loop vectorization passes">,
7463-
MarshallingInfoFlag<CodeGenOpts<"VectorizeLoop">>;
74647469
def vectorize_slp : Flag<["-"], "vectorize-slp">,
74657470
HelpText<"Run the SLP vectorization passes">,
74667471
MarshallingInfoFlag<CodeGenOpts<"VectorizeSLP">>;

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -511,39 +511,6 @@ static void addCoveragePrefixMapArg(const Driver &D, const ArgList &Args,
511511
}
512512
}
513513

514-
/// Vectorize at all optimization levels greater than 1 except for -Oz.
515-
/// For -Oz the loop vectorizer is disabled, while the slp vectorizer is
516-
/// enabled.
517-
static bool shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) {
518-
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
519-
if (A->getOption().matches(options::OPT_O4) ||
520-
A->getOption().matches(options::OPT_Ofast))
521-
return true;
522-
523-
if (A->getOption().matches(options::OPT_O0))
524-
return false;
525-
526-
assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag");
527-
528-
// Vectorize -Os.
529-
StringRef S(A->getValue());
530-
if (S == "s")
531-
return true;
532-
533-
// Don't vectorize -Oz, unless it's the slp vectorizer.
534-
if (S == "z")
535-
return isSlpVec;
536-
537-
unsigned OptLevel = 0;
538-
if (S.getAsInteger(10, OptLevel))
539-
return false;
540-
541-
return OptLevel > 1;
542-
}
543-
544-
return false;
545-
}
546-
547514
/// Add -x lang to \p CmdArgs for \p Input.
548515
static void addDashXForInput(const ArgList &Args, const InputInfo &Input,
549516
ArgStringList &CmdArgs) {

clang/lib/Driver/ToolChains/CommonArgs.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3133,3 +3133,36 @@ void tools::renderCommonIntegerOverflowOptions(const ArgList &Args,
31333133
if (use_fwrapv_pointer)
31343134
CmdArgs.push_back("-fwrapv-pointer");
31353135
}
3136+
3137+
/// Vectorize at all optimization levels greater than 1 except for -Oz.
3138+
/// For -Oz the loop vectorizer is disabled, while the slp vectorizer is
3139+
/// enabled.
3140+
bool tools::shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) {
3141+
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
3142+
if (A->getOption().matches(options::OPT_O4) ||
3143+
A->getOption().matches(options::OPT_Ofast))
3144+
return true;
3145+
3146+
if (A->getOption().matches(options::OPT_O0))
3147+
return false;
3148+
3149+
assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag");
3150+
3151+
// Vectorize -Os.
3152+
StringRef S(A->getValue());
3153+
if (S == "s")
3154+
return true;
3155+
3156+
// Don't vectorize -Oz, unless it's the slp vectorizer.
3157+
if (S == "z")
3158+
return isSlpVec;
3159+
3160+
unsigned OptLevel = 0;
3161+
if (S.getAsInteger(10, OptLevel))
3162+
return false;
3163+
3164+
return OptLevel > 1;
3165+
}
3166+
3167+
return false;
3168+
}

clang/lib/Driver/ToolChains/CommonArgs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,8 @@ bool shouldRecordCommandLine(const ToolChain &TC,
265265
void renderCommonIntegerOverflowOptions(const llvm::opt::ArgList &Args,
266266
llvm::opt::ArgStringList &CmdArgs);
267267

268+
bool shouldEnableVectorizerAtOLevel(const llvm::opt::ArgList &Args,
269+
bool isSlpVec);
268270
} // end namespace tools
269271
} // end namespace driver
270272
} // end namespace clang

clang/lib/Driver/ToolChains/Flang.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,16 @@ void Flang::addCodegenOptions(const ArgList &Args,
149149
!stackArrays->getOption().matches(options::OPT_fno_stack_arrays))
150150
CmdArgs.push_back("-fstack-arrays");
151151

152+
// Enable vectorization per default according to the optimization level
153+
// selected. For optimization levels that want vectorization we use the alias
154+
// option to simplify the hasFlag logic.
155+
bool enableVec = shouldEnableVectorizerAtOLevel(Args, false);
156+
OptSpecifier vectorizeAliasOption =
157+
enableVec ? options::OPT_O_Group : options::OPT_fvectorize;
158+
if (Args.hasFlag(options::OPT_fvectorize, vectorizeAliasOption,
159+
options::OPT_fno_vectorize, enableVec))
160+
CmdArgs.push_back("-vectorize-loops");
161+
152162
if (shouldLoopVersion(Args))
153163
CmdArgs.push_back("-fversion-loops-for-stride");
154164

flang/include/flang/Frontend/CodeGenOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ CODEGENOPT(PrepareForFullLTO , 1, 0) ///< Set when -flto is enabled on the
3131
CODEGENOPT(PrepareForThinLTO , 1, 0) ///< Set when -flto=thin is enabled on the
3232
///< compile step.
3333
CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass)
34+
CODEGENOPT(VectorizeLoop, 1, 0) ///< Enable loop vectorization.
3435
CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning.
3536
CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling
3637
CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass

flang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "clang/Basic/AllDiagnostics.h"
2424
#include "clang/Basic/DiagnosticDriver.h"
2525
#include "clang/Basic/DiagnosticOptions.h"
26+
#include "clang/Driver/Driver.h"
2627
#include "clang/Driver/DriverDiagnostic.h"
2728
#include "clang/Driver/OptionUtils.h"
2829
#include "clang/Driver/Options.h"
@@ -242,6 +243,9 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
242243
clang::driver::options::OPT_fno_stack_arrays, false))
243244
opts.StackArrays = 1;
244245

246+
if (args.getLastArg(clang::driver::options::OPT_vectorize_loops))
247+
opts.VectorizeLoop = 1;
248+
245249
if (args.hasFlag(clang::driver::options::OPT_floop_versioning,
246250
clang::driver::options::OPT_fno_loop_versioning, false))
247251
opts.LoopVersioning = 1;

flang/lib/Frontend/FrontendActions.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1037,6 +1037,8 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) {
10371037
si.getTimePasses().setOutStream(ci.getTimingStreamLLVM());
10381038
pto.LoopUnrolling = opts.UnrollLoops;
10391039
pto.LoopInterleaving = opts.UnrollLoops;
1040+
pto.LoopVectorization = opts.VectorizeLoop;
1041+
10401042
llvm::PassBuilder pb(targetMachine, pto, pgoOpt, &pic);
10411043

10421044
// Attempt to load pass plugins and register their callbacks with PB.

flang/test/Driver/optimization-remark.f90

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,33 +5,33 @@
55
! DEFINE: %{output} = -emit-llvm -flang-deprecated-no-hlfir -o /dev/null 2>&1
66

77
! Check fc1 can handle -Rpass
8-
! RUN: %flang_fc1 %s -O1 -Rpass %{output} 2>&1 | FileCheck %s --check-prefix=REMARKS
8+
! RUN: %flang_fc1 %s -O1 -vectorize-loops -Rpass %{output} 2>&1 | FileCheck %s --check-prefix=REMARKS
99

1010
! Check that we can override -Rpass= with -Rno-pass.
11-
! RUN: %flang_fc1 %s -O1 -Rpass -Rno-pass %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
11+
! RUN: %flang_fc1 %s -O1 -vectorize-loops -Rpass -Rno-pass %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
1212

1313
! Check -Rno-pass, -Rno-pass-analysis, -Rno-pass-missed nothing emitted
14-
! RUN: %flang %s -O1 -Rno-pass -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
15-
! RUN: %flang %s -O1 -Rno-pass-missed -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
16-
! RUN: %flang %s -O1 -Rno-pass-analysis -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
14+
! RUN: %flang %s -O2 -Rno-pass -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
15+
! RUN: %flang %s -O2 -Rno-pass-missed -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
16+
! RUN: %flang %s -O2 -Rno-pass-analysis -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
1717

1818
! Check valid -Rpass regex
19-
! RUN: %flang %s -O1 -Rpass=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS-REGEX-LOOP-ONLY
19+
! RUN: %flang %s -O2 -Rpass=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS-REGEX-LOOP-ONLY
2020

2121
! Check valid -Rpass-missed regex
22-
! RUN: %flang %s -O1 -Rpass-missed=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED-REGEX-LOOP-ONLY
22+
! RUN: %flang %s -O2 -Rpass-missed=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED-REGEX-LOOP-ONLY
2323

2424
! Check valid -Rpass-analysis regex
25-
! RUN: %flang %s -O1 -Rpass-analysis=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=ANALYSIS-REGEX-LOOP-ONLY
25+
! RUN: %flang %s -O2 -Rpass-analysis=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=ANALYSIS-REGEX-LOOP-ONLY
2626

2727
! Check full -Rpass message is emitted
28-
! RUN: %flang %s -O1 -Rpass -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS
28+
! RUN: %flang %s -O2 -Rpass -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS
2929

3030
! Check full -Rpass-missed message is emitted
31-
! RUN: %flang %s -O1 -Rpass-missed -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED
31+
! RUN: %flang %s -O2 -Rpass-missed -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED
3232

3333
! Check full -Rpass-analysis message is emitted
34-
! RUN: %flang %s -O1 -Rpass-analysis -S -o /dev/null 2>&1 | FileCheck %s --check-prefix=ANALYSIS
34+
! RUN: %flang %s -O2 -Rpass-analysis -S -o /dev/null 2>&1 | FileCheck %s --check-prefix=ANALYSIS
3535

3636
! REMARKS: remark:
3737
! NO-REMARKS-NOT: remark:

flang/test/Integration/unroll-loops.f90

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
! FIXME: https://github.com/llvm/llvm-project/issues/123668
22
!
33
! DEFINE: %{triple} =
4-
! DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL
5-
! DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL
4+
! DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL
5+
! DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL
66
!
77
! REDEFINE: %{triple} = aarch64-unknown-linux-gnu
88
! RUN: %if aarch64-registered-target %{ %{check-unroll} %}

flang/test/Lower/HLFIR/unroll-loops.fir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// DEFINE: %{triple} =
2-
// DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL
3-
// DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL
2+
// DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL
3+
// DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL
44

55
// REDEFINE: %{triple} = aarch64-unknown-linux-gnu
66
// RUN: %if aarch64-registered-target %{ %{check-unroll} %}

0 commit comments

Comments
 (0)