Skip to content

Commit d1a6eaa

Browse files
authored
[Attributor][NFC] Performance improvements (llvm#122923)
` forallInterferingAccesses` is a hotspot and for large modules these changes make a measurable improvement in compilation time. For LTO kernel compilation of 519.clvleaf (SPEChpc 2021) I measured the following: ``` | Measured times (s) | Average | speedup --------------------+------------------------+---------+--------- Baseline | 33.268 33.332 33.275 | 33.292 | 0% Cache "kernel" | 30.543 30.339 30.607 | 30.496 | 9.2% templatize callback | 30.981 30.97 30.964 | 30.972 | 7.5% Both changes | 29.284 29.201 29.053 | 29.179 | 14.1% ```
1 parent 63d3bd6 commit d1a6eaa

File tree

3 files changed

+23
-14
lines changed

3 files changed

+23
-14
lines changed

llvm/include/llvm/Transforms/IPO/Attributor.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1287,6 +1287,12 @@ struct InformationCache {
12871287
return AG.getAnalysis<TargetLibraryAnalysis>(F);
12881288
}
12891289

1290+
/// Return true if \p F has the "kernel" function attribute
1291+
bool isKernel(const Function &F) {
1292+
FunctionInfo &FI = getFunctionInfo(F);
1293+
return FI.IsKernel;
1294+
}
1295+
12901296
/// Return true if \p Arg is involved in a must-tail call, thus the argument
12911297
/// of the caller or callee.
12921298
bool isInvolvedInMustTailCall(const Argument &Arg) {
@@ -1361,6 +1367,9 @@ struct InformationCache {
13611367

13621368
/// Function contains a `musttail` call.
13631369
bool ContainsMustTailCall;
1370+
1371+
/// Function has the `"kernel"` attribute
1372+
bool IsKernel;
13641373
};
13651374

13661375
/// A map type from functions to informatio about it.

llvm/lib/Transforms/IPO/Attributor.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -678,8 +678,8 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI,
678678
// kernel, values like allocas and shared memory are not accessible. We
679679
// implicitly check for this situation to avoid costly lookups.
680680
if (GoBackwardsCB && &ToFn != FromI.getFunction() &&
681-
!GoBackwardsCB(*FromI.getFunction()) && ToFn.hasFnAttribute("kernel") &&
682-
FromI.getFunction()->hasFnAttribute("kernel")) {
681+
!GoBackwardsCB(*FromI.getFunction()) && A.getInfoCache().isKernel(ToFn) &&
682+
A.getInfoCache().isKernel(*FromI.getFunction())) {
683683
LLVM_DEBUG(dbgs() << "[AA] assume kernel cannot be reached from within the "
684684
"module; success\n";);
685685
return false;
@@ -3191,6 +3191,8 @@ void InformationCache::initializeInformationCache(const Function &CF,
31913191
// initialize the cache eagerly which would look the same to the users.
31923192
Function &F = const_cast<Function &>(CF);
31933193

3194+
FI.IsKernel = F.hasFnAttribute("kernel");
3195+
31943196
// Walk all instructions to find interesting instructions that might be
31953197
// queried by abstract attributes during their initialization or update.
31963198
// This has to happen before we create attributes.

llvm/lib/Transforms/IPO/AttributorAttributes.cpp

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -885,9 +885,8 @@ struct AA::PointerInfo::State : public AbstractState {
885885
AAPointerInfo::OffsetInfo ReturnedOffsets;
886886

887887
/// See AAPointerInfo::forallInterferingAccesses.
888-
bool forallInterferingAccesses(
889-
AA::RangeTy Range,
890-
function_ref<bool(const AAPointerInfo::Access &, bool)> CB) const {
888+
template <typename F>
889+
bool forallInterferingAccesses(AA::RangeTy Range, F CB) const {
891890
if (!isValidState() || !ReturnedOffsets.isUnassigned())
892891
return false;
893892

@@ -906,10 +905,9 @@ struct AA::PointerInfo::State : public AbstractState {
906905
}
907906

908907
/// See AAPointerInfo::forallInterferingAccesses.
909-
bool forallInterferingAccesses(
910-
Instruction &I,
911-
function_ref<bool(const AAPointerInfo::Access &, bool)> CB,
912-
AA::RangeTy &Range) const {
908+
template <typename F>
909+
bool forallInterferingAccesses(Instruction &I, F CB,
910+
AA::RangeTy &Range) const {
913911
if (!isValidState() || !ReturnedOffsets.isUnassigned())
914912
return false;
915913

@@ -1176,7 +1174,7 @@ struct AAPointerInfoImpl
11761174
// TODO: Use reaching kernels from AAKernelInfo (or move it to
11771175
// AAExecutionDomain) such that we allow scopes other than kernels as long
11781176
// as the reaching kernels are disjoint.
1179-
bool InstInKernel = Scope.hasFnAttribute("kernel");
1177+
bool InstInKernel = A.getInfoCache().isKernel(Scope);
11801178
bool ObjHasKernelLifetime = false;
11811179
const bool UseDominanceReasoning =
11821180
FindInterferingWrites && IsKnownNoRecurse;
@@ -1210,7 +1208,7 @@ struct AAPointerInfoImpl
12101208
// If the alloca containing function is not recursive the alloca
12111209
// must be dead in the callee.
12121210
const Function *AIFn = AI->getFunction();
1213-
ObjHasKernelLifetime = AIFn->hasFnAttribute("kernel");
1211+
ObjHasKernelLifetime = A.getInfoCache().isKernel(*AIFn);
12141212
bool IsKnownNoRecurse;
12151213
if (AA::hasAssumedIRAttr<Attribute::NoRecurse>(
12161214
A, this, IRPosition::function(*AIFn), DepClassTy::OPTIONAL,
@@ -1222,8 +1220,8 @@ struct AAPointerInfoImpl
12221220
// as it is "dead" in the (unknown) callees.
12231221
ObjHasKernelLifetime = HasKernelLifetime(GV, *GV->getParent());
12241222
if (ObjHasKernelLifetime)
1225-
IsLiveInCalleeCB = [](const Function &Fn) {
1226-
return !Fn.hasFnAttribute("kernel");
1223+
IsLiveInCalleeCB = [&A](const Function &Fn) {
1224+
return !A.getInfoCache().isKernel(Fn);
12271225
};
12281226
}
12291227

@@ -1238,7 +1236,7 @@ struct AAPointerInfoImpl
12381236
// If the object has kernel lifetime we can ignore accesses only reachable
12391237
// by other kernels. For now we only skip accesses *in* other kernels.
12401238
if (InstInKernel && ObjHasKernelLifetime && !AccInSameScope &&
1241-
AccScope->hasFnAttribute("kernel"))
1239+
A.getInfoCache().isKernel(*AccScope))
12421240
return true;
12431241

12441242
if (Exact && Acc.isMustAccess() && Acc.getRemoteInst() != &I) {

0 commit comments

Comments
 (0)