Skip to content

[flang] Add -f[no-]unroll-loops flag #122906

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -4157,9 +4157,9 @@ def ftrap_function_EQ : Joined<["-"], "ftrap-function=">, Group<f_Group>,
HelpText<"Issue call to specified function rather than a trap instruction">,
MarshallingInfoString<CodeGenOpts<"TrapFuncName">>;
def funroll_loops : Flag<["-"], "funroll-loops">, Group<f_Group>,
HelpText<"Turn on loop unroller">, Visibility<[ClangOption, CC1Option]>;
HelpText<"Turn on loop unroller">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group<f_Group>,
HelpText<"Turn off loop unroller">, Visibility<[ClangOption, CC1Option]>;
HelpText<"Turn off loop unroller">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
def ffinite_loops: Flag<["-"], "ffinite-loops">, Group<f_Group>,
HelpText<"Assume all non-trivial loops are finite.">, Visibility<[ClangOption, CC1Option]>;
def fno_finite_loops: Flag<["-"], "fno-finite-loops">, Group<f_Group>,
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Driver/ToolChains/Flang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,8 @@ void Flang::addCodegenOptions(const ArgList &Args,
options::OPT_flang_deprecated_no_hlfir,
options::OPT_fno_ppc_native_vec_elem_order,
options::OPT_fppc_native_vec_elem_order,
options::OPT_ftime_report, options::OPT_ftime_report_EQ});
options::OPT_ftime_report, options::OPT_ftime_report_EQ,
options::OPT_funroll_loops, options::OPT_fno_unroll_loops});
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can add a forwarding test from the driver to the frontend driver.

}

void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
Expand Down
1 change: 1 addition & 0 deletions flang/include/flang/Frontend/CodeGenOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ CODEGENOPT(PrepareForThinLTO , 1, 0) ///< Set when -flto=thin is enabled on the
///< compile step.
CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass)
CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning.
CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling
CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass

CODEGENOPT(Underscoring, 1, 1)
Expand Down
4 changes: 4 additions & 0 deletions flang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,10 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
clang::driver::options::OPT_fno_loop_versioning, false))
opts.LoopVersioning = 1;

opts.UnrollLoops = args.hasFlag(clang::driver::options::OPT_funroll_loops,
clang::driver::options::OPT_fno_unroll_loops,
(opts.OptimizationLevel > 1));

opts.AliasAnalysis = opts.OptimizationLevel > 0;

// -mframe-pointer=none/non-leaf/all option.
Expand Down
2 changes: 2 additions & 0 deletions flang/lib/Frontend/FrontendActions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1028,6 +1028,8 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) {
si.registerCallbacks(pic, &mam);
if (ci.isTimingEnabled())
si.getTimePasses().setOutStream(ci.getTimingStreamLLVM());
pto.LoopUnrolling = opts.UnrollLoops;
pto.LoopInterleaving = opts.UnrollLoops;
llvm::PassBuilder pb(targetMachine, pto, pgoOpt, &pic);

// Attempt to load pass plugins and register their callbacks with PB.
Expand Down
5 changes: 5 additions & 0 deletions flang/test/Driver/funroll-loops.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
! RUN: %flang -### -funroll-loops %s 2>&1 | FileCheck %s -check-prefix UNROLL
! RUN: %flang -### -fno-unroll-loops %s 2>&1 | FileCheck %s -check-prefix NO-UNROLL

! UNROLL: "-funroll-loops"
! NO-UNROLL: "-fno-unroll-loops"
39 changes: 39 additions & 0 deletions flang/test/HLFIR/unroll-loops.fir
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// RUN: %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=2 -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL
// RUN: %flang_fc1 -emit-llvm -O2 -mllvm -force-vector-width=2 -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL
// RUN: %flang_fc1 -emit-llvm -O1 -fno-unroll-loops -mllvm -force-vector-width=2 -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL
// RUN: %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=2 -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL

// CHECK-LABEL: @unroll
// CHECK-SAME: (ptr nocapture writeonly %[[ARG0:.*]])
func.func @unroll(%arg0: !fir.ref<!fir.array<1000 x index>> {fir.bindc_name = "a"}) {
%scope = fir.dummy_scope : !fir.dscope
%c1000 = arith.constant 1000 : index
%shape = fir.shape %c1000 : (index) -> !fir.shape<1>
%a:2 = hlfir.declare %arg0(%shape) dummy_scope %scope {uniq_name = "unrollEa"} : (!fir.ref<!fir.array<1000xindex>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<1000 x index>>, !fir.ref<!fir.array<1000 x index>>)
%c1 = arith.constant 1 : index
fir.do_loop %arg1 = %c1 to %c1000 step %c1 {
// CHECK: br label %[[BLK:.*]]
// CHECK: [[BLK]]:
// CHECK-NEXT: %[[IND:.*]] = phi i64 [ 0, %{{.*}} ], [ %[[NIV:.*]], %[[BLK]] ]
// CHECK-NEXT: %[[VIND:.*]] = phi <2 x i64> [ <i64 1, i64 2>, %{{.*}} ], [ %[[NVIND:.*]], %[[BLK]] ]

// NO-UNROLL-NEXT: %[[GEP:.*]] = getelementptr i64, ptr %[[ARG0]], i64 %[[IND]]
// NO-UNROLL-NEXT: store <2 x i64> %[[VIND]], ptr %[[GEP]]
// NO-UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %{{.*}}, 2
// NO-UNROLL-NEXT: %[[NVIND]] = add <2 x i64> %[[VIND]], splat (i64 2)

// UNROLL-NEXT: %[[VIND1:.*]] = add <2 x i64> %[[VIND]], splat (i64 2)
// UNROLL-NEXT: %[[GEP0:.*]] = getelementptr i64, ptr %[[ARG0]], i64 %[[IND]]
// UNROLL-NEXT: %[[GEP1:.*]] = getelementptr i8, ptr %[[GEP0]], i64 16
// UNROLL-NEXT: store <2 x i64> %[[VIND]], ptr %[[GEP0]]
// UNROLL-NEXT: store <2 x i64> %[[VIND1]], ptr %[[GEP1]]
// UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %[[IND]], 4
// UNROLL-NEXT: %[[NVIND:.*]] = add <2 x i64> %[[VIND]], splat (i64 4)

// CHECK-NEXT: %[[EXIT:.*]] = icmp eq i64 %[[NIV]], 1000
// CHECK-NEXT: br i1 %[[EXIT]], label %{{.*}}, label %[[BLK]]
%ai = hlfir.designate %a#0 (%arg1) : (!fir.ref<!fir.array<1000 x index>>, index) -> !fir.ref<index>
hlfir.assign %arg1 to %ai : index, !fir.ref<index>
}
return
}
34 changes: 34 additions & 0 deletions flang/test/Integration/unroll-loops.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
! RUN: %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=2 -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL
! RUN: %flang_fc1 -emit-llvm -O2 -mllvm -force-vector-width=2 -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL
! RUN: %flang_fc1 -emit-llvm -O1 -fno-unroll-loops -mllvm -force-vector-width=2 -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL
! RUN: %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=2 -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL

! CHECK-LABEL: @unroll
! CHECK-SAME: (ptr nocapture writeonly %[[ARG0:.*]])
subroutine unroll(a)
integer(kind=8), intent(out) :: a(1000)
integer(kind=8) :: i
! CHECK: br label %[[BLK:.*]]
! CHECK: [[BLK]]:
! CHECK-NEXT: %[[IND:.*]] = phi i64 [ 0, %{{.*}} ], [ %[[NIV:.*]], %[[BLK]] ]
! CHECK-NEXT: %[[VIND:.*]] = phi <2 x i64> [ <i64 1, i64 2>, %{{.*}} ], [ %[[NVIND:.*]], %[[BLK]] ]
!
! NO-UNROLL-NEXT: %[[GEP:.*]] = getelementptr i64, ptr %[[ARG0]], i64 %[[IND]]
! NO-UNROLL-NEXT: store <2 x i64> %[[VIND]], ptr %[[GEP]]
! NO-UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %{{.*}}, 2
! NO-UNROLL-NEXT: %[[NVIND]] = add <2 x i64> %[[VIND]], splat (i64 2)
!
! UNROLL-NEXT: %[[VIND1:.*]] = add <2 x i64> %[[VIND]], splat (i64 2)
! UNROLL-NEXT: %[[GEP0:.*]] = getelementptr i64, ptr %[[ARG0]], i64 %[[IND]]
! UNROLL-NEXT: %[[GEP1:.*]] = getelementptr i8, ptr %[[GEP0]], i64 16
! UNROLL-NEXT: store <2 x i64> %[[VIND]], ptr %[[GEP0]]
! UNROLL-NEXT: store <2 x i64> %[[VIND1]], ptr %[[GEP1]]
! UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %[[IND]], 4
! UNROLL-NEXT: %[[NVIND:.*]] = add <2 x i64> %[[VIND]], splat (i64 4)
!
! CHECK-NEXT: %[[EXIT:.*]] = icmp eq i64 %[[NIV]], 1000
! CHECK-NEXT: br i1 %[[EXIT]], label %{{.*}}, label %[[BLK]]
do i=1,1000
a(i) = i
end do
end subroutine
Loading