Skip to content

Commit d16ecad

Browse files
authored
[flang] Disable noalias by default (#142128)
With these enabled we see a 70% performance regression for exchange2_r on neoverse-v1 (aws graviton 3) using `-mcpu=native -Ofast -flto`. There is also a smaller regression on neoverse-v2. This appears to be because function specialization is no longer kicking in during LTO for digits_2. This can be seen in the output executable: previously it contained specialized copies of the function with names like `_QMbrute_forcePdigits_2.specialized.4`. Now there are no names like this. The bug is not in flang - instead in the function specialization pass - but due to the size of the regression I would like to request that this is disabled until function specialization has been fixed.
1 parent c66dbbe commit d16ecad

File tree

4 files changed

+12
-6
lines changed

4 files changed

+12
-6
lines changed

flang/lib/Optimizer/Passes/Pipelines.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@
1010
/// common to flang and the test tools.
1111

1212
#include "flang/Optimizer/Passes/Pipelines.h"
13+
#include "llvm/Support/CommandLine.h"
14+
15+
/// Force setting the no-alias attribute on fuction arguments when possible.
16+
static llvm::cl::opt<bool> forceNoAlias("force-no-alias", llvm::cl::Hidden,
17+
llvm::cl::init(false));
1318

1419
namespace fir {
1520

@@ -350,9 +355,10 @@ void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm,
350355
else
351356
framePointerKind = mlir::LLVM::framePointerKind::FramePointerKind::None;
352357

353-
bool setNoCapture = false, setNoAlias = false;
354-
if (config.OptLevel.isOptimizingForSpeed())
355-
setNoCapture = setNoAlias = true;
358+
// TODO: re-enable setNoAlias by default (when optimizing for speed) once
359+
// function specialization is fixed.
360+
bool setNoAlias = forceNoAlias;
361+
bool setNoCapture = config.OptLevel.isOptimizingForSpeed();
356362

357363
pm.addPass(fir::createFunctionAttr(
358364
{framePointerKind, config.InstrumentFunctionEntry,

flang/test/Fir/struct-passing-x86-64-byval.fir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Test X86-64 ABI rewrite of struct passed by value (BIND(C), VALUE derived types).
22
// This test test cases where the struct must be passed on the stack according
33
// to the System V ABI.
4-
// RUN: tco --target=x86_64-unknown-linux-gnu %s | FileCheck %s
4+
// RUN: tco --target=x86_64-unknown-linux-gnu --force-no-alias %s | FileCheck %s
55

66
module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
77

flang/test/Fir/target-rewrite-complex-10-x86.fir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// Test COMPLEX(10) passing and returning on X86
22
// RUN: fir-opt --target-rewrite="target=x86_64-unknown-linux-gnu" %s | FileCheck %s --check-prefix=AMD64
3-
// RUN: tco -target="x86_64-unknown-linux-gnu" %s | FileCheck %s --check-prefix=AMD64_LLVM
3+
// RUN: tco -target="x86_64-unknown-linux-gnu" --force-no-alias %s | FileCheck %s --check-prefix=AMD64_LLVM
44

55
module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
66

flang/test/Fir/target.fir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: tco --target=i386-unknown-linux-gnu %s | FileCheck %s --check-prefix=I32
1+
// RUN: tco --target=i386-unknown-linux-gnu --force-no-alias %s | FileCheck %s --check-prefix=I32
22
// RUN: tco --target=x86_64-unknown-linux-gnu %s | FileCheck %s --check-prefix=X64
33
// RUN: tco --target=aarch64-unknown-linux-gnu %s | FileCheck %s --check-prefix=AARCH64
44
// RUN: tco --target=powerpc64le-unknown-linux-gnu %s | FileCheck %s --check-prefix=PPC

0 commit comments

Comments
 (0)