Skip to content

[flang] Use fir.declare/fir.dummy_scope for TBAA tags attachments. #92472

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion flang/include/flang/Optimizer/Analysis/AliasAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,17 @@ struct AliasAnalysis {
/// Source definition of a value.
SourceUnion u;

/// A value definition denoting the place where the corresponding
/// source variable was instantiated by the front-end.
/// Currently, it is the result of [hl]fir.declare of the source,
/// if we can reach it.
/// It helps to identify the scope where the corresponding variable
/// was defined in the original Fortran source, e.g. when MLIR
/// inlining happens an inlined fir.declare of the callee's
/// dummy argument identifies the scope where the source
/// may be treated as a dummy argument.
mlir::Value instantiationPoint;

/// Whether the source was reached following data or box reference
bool isData{false};
};
Expand Down Expand Up @@ -168,7 +179,10 @@ struct AliasAnalysis {
mlir::ModRefResult getModRef(mlir::Operation *op, mlir::Value location);

/// Return the memory source of a value.
Source getSource(mlir::Value);
/// If getInstantiationPoint is true, the search for the source
/// will stop at [hl]fir.declare if it represents a dummy
/// argument declaration (i.e. it has the dummy_scope operand).
Source getSource(mlir::Value, bool getInstantiationPoint = false);
};

inline bool operator==(const AliasAnalysis::Source::SourceOrigin &lhs,
Expand Down
14 changes: 14 additions & 0 deletions flang/include/flang/Optimizer/Analysis/TBAAForest.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,20 @@ class TBAAForrest {
}
return getFuncTree(func.getSymNameAttr());
}
// Returns the TBAA tree associated with the scope enclosed
// within the given function. With MLIR inlining, there may
// be multiple scopes within a single function. It is the caller's
// responsibility to provide unique name for the scope.
// If the scope string is empty, returns the TBAA tree for the
// "root" scope of the given function.
inline const TBAATree &getFuncTreeWithScope(mlir::func::FuncOp func,
llvm::StringRef scope) {
mlir::StringAttr name = func.getSymNameAttr();
if (!scope.empty())
name = mlir::StringAttr::get(name.getContext(),
llvm::Twine(name) + " - " + scope);
return getFuncTree(name);
}

private:
const TBAATree &getFuncTree(mlir::StringAttr symName) {
Expand Down
34 changes: 31 additions & 3 deletions flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ bool AliasAnalysis::Source::isRecordWithPointerComponent() const {
}

AliasResult AliasAnalysis::alias(Value lhs, Value rhs) {
// TODO: alias() has to be aware of the function scopes.
// After MLIR inlining, the current implementation may
// not recognize non-aliasing entities.
auto lhsSrc = getSource(lhs);
auto rhsSrc = getSource(rhs);
bool approximateSource = lhsSrc.approximateSource || rhsSrc.approximateSource;
Expand Down Expand Up @@ -232,7 +235,8 @@ getAttrsFromVariable(fir::FortranVariableOpInterface var) {
return attrs;
}

AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v) {
AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
bool getInstantiationPoint) {
auto *defOp = v.getDefiningOp();
SourceKind type{SourceKind::Unknown};
mlir::Type ty;
Expand All @@ -244,6 +248,7 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v) {
bool followingData = !isBoxRef;
mlir::SymbolRefAttr global;
Source::Attributes attributes;
mlir::Value instantiationPoint;
while (defOp && !breakFromLoop) {
ty = defOp->getResultTypes()[0];
llvm::TypeSwitch<Operation *>(defOp)
Expand Down Expand Up @@ -334,6 +339,21 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v) {
breakFromLoop = true;
return;
}
if (getInstantiationPoint) {
// Fetch only the innermost instantiation point.
if (!instantiationPoint)
instantiationPoint = op->getResult(0);

if (op.getDummyScope()) {
// Do not track past DeclareOp that has the dummy_scope
// operand. This DeclareOp is known to represent
// a dummy argument for some runtime instantiation
// of a procedure.
type = SourceKind::Argument;
breakFromLoop = true;
return;
}
}
// TODO: Look for the fortran attributes present on the operation
// Track further through the operand
v = op.getMemref();
Expand Down Expand Up @@ -372,9 +392,17 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v) {
}

if (type == SourceKind::Global) {
return {{global, followingData}, type, ty, attributes, approximateSource};
return {{global, instantiationPoint, followingData},
type,
ty,
attributes,
approximateSource};
}
return {{v, followingData}, type, ty, attributes, approximateSource};
return {{v, instantiationPoint, followingData},
type,
ty,
attributes,
approximateSource};
}

} // namespace fir
32 changes: 32 additions & 0 deletions flang/lib/Optimizer/CodeGen/TBAABuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,38 @@ TBAABuilder::TBAABuilder(MLIRContext *context, bool applyTBAA,
bool forceUnifiedTree)
: enableTBAA(applyTBAA && !disableTBAA),
trees(/*separatePerFunction=*/perFunctionTBAATrees && !forceUnifiedTree) {
// TODO: the TBAA tags created here are rooted in the root scope
// of the enclosing function. This does not work best with MLIR inlining.
// A better approach is to root them according to the scopes they belong to
// and that were used by AddAliasTagsPass to create TBAA tags before
// the CodeGen. For example:
// subroutine caller(a, b, ptr)
// real, target :: a(:), b(:)
// integer, pointer :: ptr(:)
// call callee(a, b, ptr)
// end
// subroutine callee(a, b, ptr)
// real :: a(:), b(:)
// integer, pointer :: ptr(:)
// do i=...
// a(ptr(i)) = b(ptr(i))
// end do
// end
//
// When callee is inlined, the dummy arguments 'a' and 'b' will
// be rooted in TBAA tree corresponding to the `call callee` call site,
// saying that the references to 'a' and 'b' cannot alias each other.
// These tags will be created by AddAliasTagsPass, but it will not be able
// to create any tags for 'ptr' references.
// During the CodeGen, we create 'any data access' tags for the
// 'ptr' acceses. If they are rooted within the root scope of `caller`,
// they end up in a different TBAA tree with the 'a' and 'b' access
// tags, so 'ptr', 'a' and 'b' references MayAlias. Moreover,
// the box access of 'ptr' will also be in a different TBAA tree
// with 'a' and 'b' tags, meaning they can also alias.
// This will prevent LLVM vectorization even with memory conflict checks.
// It seems that we'd better move all TBAA tags assignment to
// AddAliasTagsPass, which can at least rely on the dummy arguments scopes.
Comment on lines +85 to +86
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem is that box loads and stores are implicit in FIR. There is no operation to tag until after codegen.

I would be happy to see an end to this so that most uses of !fir.box<> become !fir.ref<!fir.box<>>. It makes IR a bit more verbose but I think it would make it a lot more obvious at a glance where we are generating unnecessary pointer nesting.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the box loads/stores for SSA boxes are implicit. Since the loads/stores only make problems after lowering to LLVM dialect, we can try to attack it from multiple directions:

  • A local box load/store may be well disambiguated by LLVM alias analysis (without any TBAA) if we make sure that LLVM sees that its address is not escaping. For example, this means that when an SSA box is passed to a function call, the resulting LLVM pointer argument has nocapture attribute.
  • The SSA boxes that are function arguments, are converted to pointer function arguments, but these pointer arguments, by construction, are know not to alias anything within the function scope, so the corresponding function arguments may all have noalias attribute.

Just these two approaches may resolve a lot of LLVM aliasing issues for the SSA boxes. If this is not enough we can also think about attaching TBAA tags to operations that produce unique SSA boxes (e.g. embox/rebox) and read from SSA boxes (e.g. box_addr), and then propagate these tags to the corresponding loads/stores in the CodeGen. I am not in favor of this, because we will have to add TBAA attribute to many more FIR operations.

I think nocapture and noalias should work for many cases of SSA boxes, and for the explicit loads/stores of the boxes we can attach TBAA descriptor member tags in AddAliasTagsPass (with proper scoping) and propagate them in the CodeGen.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your idea sounds good to me

if (!enableTBAA)
return;
}
Expand Down
9 changes: 8 additions & 1 deletion flang/lib/Optimizer/Dialect/FIROps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3910,8 +3910,15 @@ std::optional<std::int64_t> fir::getIntIfConstant(mlir::Value value) {

bool fir::isDummyArgument(mlir::Value v) {
auto blockArg{mlir::dyn_cast<mlir::BlockArgument>(v)};
if (!blockArg)
if (!blockArg) {
auto defOp = v.getDefiningOp();
if (defOp) {
if (auto declareOp = mlir::dyn_cast<fir::DeclareOp>(defOp))
if (declareOp.getDummyScope())
return true;
}
return false;
}

auto *owner{blockArg.getOwner()};
return owner->isEntryBlock() &&
Expand Down
101 changes: 94 additions & 7 deletions flang/lib/Optimizer/Transforms/AddAliasTags.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@
#include "flang/Optimizer/Dialect/FIRDialect.h"
#include "flang/Optimizer/Dialect/FirAliasTagOpInterface.h"
#include "flang/Optimizer/Transforms/Passes.h"
#include "mlir/IR/Dominance.h"
#include "mlir/Pass/Pass.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
Expand Down Expand Up @@ -54,24 +56,85 @@ namespace {
/// Shared state per-module
class PassState {
public:
PassState(mlir::DominanceInfo &domInfo) : domInfo(domInfo) {}
/// memoised call to fir::AliasAnalysis::getSource
inline const fir::AliasAnalysis::Source &getSource(mlir::Value value) {
if (!analysisCache.contains(value))
analysisCache.insert({value, analysis.getSource(value)});
analysisCache.insert(
{value, analysis.getSource(value, /*getInstantiationPoint=*/true)});
return analysisCache[value];
}

/// get the per-function TBAATree for this function
inline const fir::TBAATree &getFuncTree(mlir::func::FuncOp func) {
return forrest[func];
}
inline const fir::TBAATree &getFuncTreeWithScope(mlir::func::FuncOp func,
fir::DummyScopeOp scope) {
auto &scopeMap = scopeNames.at(func);
return forrest.getFuncTreeWithScope(func, scopeMap.lookup(scope));
}

void processFunctionScopes(mlir::func::FuncOp func);
fir::DummyScopeOp getDeclarationScope(fir::DeclareOp declareOp);

private:
mlir::DominanceInfo &domInfo;
fir::AliasAnalysis analysis;
llvm::DenseMap<mlir::Value, fir::AliasAnalysis::Source> analysisCache;
fir::TBAAForrest forrest;
// Unique names for fir.dummy_scope operations within
// the given function.
llvm::DenseMap<mlir::func::FuncOp,
llvm::DenseMap<fir::DummyScopeOp, std::string>>
scopeNames;
// A map providing a vector of fir.dummy_scope operations
// for the given function. The vectors are sorted according
// to the dominance information.
llvm::DenseMap<mlir::func::FuncOp, llvm::SmallVector<fir::DummyScopeOp, 16>>
sortedScopeOperations;
};

// Process fir.dummy_scope operations in the given func:
// sort them according to the dominance information, and
// associate a unique (within the current function) scope name
// with each of them.
void PassState::processFunctionScopes(mlir::func::FuncOp func) {
if (scopeNames.contains(func))
return;

auto &scopeMap = scopeNames.getOrInsertDefault(func);
auto &scopeOps = sortedScopeOperations.getOrInsertDefault(func);
func.walk([&](fir::DummyScopeOp op) { scopeOps.push_back(op); });
llvm::stable_sort(scopeOps, [&](const fir::DummyScopeOp &op1,
const fir::DummyScopeOp &op2) {
return domInfo.properlyDominates(&*op1, &*op2);
});
unsigned scopeId = 0;
for (auto scope : scopeOps) {
if (scopeId != 0) {
std::string name = (llvm::Twine("Scope ") + llvm::Twine(scopeId)).str();
LLVM_DEBUG(llvm::dbgs() << "Creating scope '" << name << "':\n"
<< scope << "\n");
scopeMap.insert({scope, std::move(name)});
}
++scopeId;
}
}

// For the given fir.declare returns the dominating fir.dummy_scope
// operation.
fir::DummyScopeOp PassState::getDeclarationScope(fir::DeclareOp declareOp) {
auto func = declareOp->getParentOfType<mlir::func::FuncOp>();
assert(func && "fir.declare does not have parent func.func");
auto &scopeOps = sortedScopeOperations.at(func);
for (auto II = scopeOps.rbegin(), IE = scopeOps.rend(); II != IE; ++II) {
if (domInfo.dominates(&**II, &*declareOp))
return *II;
}
return nullptr;
}

class AddAliasTagsPass : public fir::impl::AddAliasTagsBase<AddAliasTagsPass> {
public:
void runOnOperation() override;
Expand All @@ -85,6 +148,9 @@ class AddAliasTagsPass : public fir::impl::AddAliasTagsBase<AddAliasTagsPass> {
} // namespace

static fir::DeclareOp getDeclareOp(mlir::Value arg) {
if (auto declare =
mlir::dyn_cast_or_null<fir::DeclareOp>(arg.getDefiningOp()))
return declare;
for (mlir::Operation *use : arg.getUsers())
if (fir::DeclareOp declare = mlir::dyn_cast<fir::DeclareOp>(use))
return declare;
Expand All @@ -94,7 +160,7 @@ static fir::DeclareOp getDeclareOp(mlir::Value arg) {
/// Get the name of a function argument using the "fir.bindc_name" attribute,
/// or ""
static std::string getFuncArgName(mlir::Value arg) {
// first try getting the name from the hlfir.declare
// first try getting the name from the fir.declare
if (fir::DeclareOp declare = getDeclareOp(arg))
return declare.getUniqName().str();

Expand Down Expand Up @@ -139,6 +205,23 @@ void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasTagOpInterface op,
return;
}

// Process the scopes, if not processed yet.
state.processFunctionScopes(func);

fir::DummyScopeOp scopeOp;
if (auto declVal = source.origin.instantiationPoint) {
// If the source is a dummy argument within some fir.dummy_scope,
// then find the corresponding innermost scope to be used for finding
// the right TBAA tree.
auto declareOp =
mlir::dyn_cast_or_null<fir::DeclareOp>(declVal.getDefiningOp());
assert(declareOp && "Instantiation point must be fir.declare");
if (auto dummyScope = declareOp.getDummyScope())
scopeOp = mlir::cast<fir::DummyScopeOp>(dummyScope.getDefiningOp());
if (!scopeOp)
scopeOp = state.getDeclarationScope(declareOp);
}

mlir::LLVM::TBAATagAttr tag;
// TBAA for dummy arguments
if (enableDummyArgs &&
Expand All @@ -147,7 +230,8 @@ void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasTagOpInterface op,
<< "Found reference to dummy argument at " << *op << "\n");
std::string name = getFuncArgName(source.origin.u.get<mlir::Value>());
if (!name.empty())
tag = state.getFuncTree(func).dummyArgDataTree.getTag(name);
tag = state.getFuncTreeWithScope(func, scopeOp)
.dummyArgDataTree.getTag(name);
else
LLVM_DEBUG(llvm::dbgs().indent(2)
<< "WARN: couldn't find a name for dummy argument " << *op
Expand All @@ -161,7 +245,7 @@ void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasTagOpInterface op,
const char *name = glbl.getRootReference().data();
LLVM_DEBUG(llvm::dbgs().indent(2) << "Found reference to global " << name
<< " at " << *op << "\n");
tag = state.getFuncTree(func).globalDataTree.getTag(name);
tag = state.getFuncTreeWithScope(func, scopeOp).globalDataTree.getTag(name);

// TBAA for SourceKind::Direct
} else if (enableDirect &&
Expand All @@ -172,7 +256,8 @@ void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasTagOpInterface op,
const char *name = glbl.getRootReference().data();
LLVM_DEBUG(llvm::dbgs().indent(2) << "Found reference to direct " << name
<< " at " << *op << "\n");
tag = state.getFuncTree(func).directDataTree.getTag(name);
tag =
state.getFuncTreeWithScope(func, scopeOp).directDataTree.getTag(name);
} else {
// SourceKind::Direct is likely to be extended to cases which are not a
// SymbolRefAttr in the future
Expand All @@ -193,7 +278,8 @@ void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasTagOpInterface op,
if (name) {
LLVM_DEBUG(llvm::dbgs().indent(2) << "Found reference to allocation "
<< name << " at " << *op << "\n");
tag = state.getFuncTree(func).allocatedDataTree.getTag(*name);
tag = state.getFuncTreeWithScope(func, scopeOp)
.allocatedDataTree.getTag(*name);
} else {
LLVM_DEBUG(llvm::dbgs().indent(2)
<< "WARN: couldn't find a name for allocation " << *op
Expand All @@ -219,7 +305,8 @@ void AddAliasTagsPass::runOnOperation() {
// Instead this pass stores state per mlir::ModuleOp (which is what MLIR
// thinks the pass operates on), then the real work of the pass is done in
// runOnAliasInterface
PassState state;
auto &domInfo = getAnalysis<mlir::DominanceInfo>();
PassState state(domInfo);

mlir::ModuleOp mod = getOperation();
mod.walk(
Expand Down
9 changes: 7 additions & 2 deletions flang/lib/Optimizer/Transforms/AddDebugInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,13 @@ void AddDebugInfoPass::handleDeclareOp(fir::cg::XDeclareOp declOp,

// FIXME: There may be cases where an argument is processed a bit before
// DeclareOp is generated. In that case, DeclareOp may point to an
// intermediate op and not to BlockArgument. We need to find those cases and
// walk the chain to get to the actual argument.
// intermediate op and not to BlockArgument.
// Moreover, with MLIR inlining we cannot use the BlockArgument
// position to identify the original number of the dummy argument.
// If we want to keep running AddDebugInfoPass late, the dummy argument
// position in the argument list has to be expressed in FIR (e.g. as a
// constant attribute of [hl]fir.declare/fircg.ext_declare operation that has
// a dummy_scope operand).
unsigned argNo = 0;
if (fir::isDummyArgument(declOp.getMemref())) {
auto arg = llvm::cast<mlir::BlockArgument>(declOp.getMemref());
Expand Down
Loading
Loading