Skip to content

Commit b2a8d2c

Browse files
committed
[OpenMP] Avoid running openmp-opt on dead functions
The Attributor has logic to run only on assumed live functions and this is exposed to users now. OpenMP-opt will (mostly) ignore dead internal functions now but run the same deduction as before if an internal function is marked live. This should lower compile time as we run on less code and delete more code early on. For the full OpenMC module compiled with noinline and JITed at runtime, we save ~25%, or ~10s on my machine during JITing.
1 parent c3de9c1 commit b2a8d2c

File tree

6 files changed

+64
-65
lines changed

6 files changed

+64
-65
lines changed

llvm/include/llvm/Transforms/IPO/Attributor.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1406,12 +1406,13 @@ struct AttributorConfig {
14061406
bool RewriteSignatures = true;
14071407

14081408
/// Flag to determine if we want to initialize all default AAs for an internal
1409-
/// function marked live.
1410-
/// TODO: This should probably be a callback, or maybe
1411-
/// identifyDefaultAbstractAttributes should be virtual, something to allow
1412-
/// customizable lazy initialization for internal functions.
1409+
/// function marked live. See also: InitializationCallback>
14131410
bool DefaultInitializeLiveInternals = true;
14141411

1412+
/// Callback function to be invoked on internal functions marked live.
1413+
std::function<void(Attributor &A, const Function &F)> InitializationCallback =
1414+
nullptr;
1415+
14151416
/// Helper to update an underlying call graph and to delete functions.
14161417
CallGraphUpdater &CGUpdater;
14171418

@@ -1738,6 +1739,8 @@ struct Attributor {
17381739

17391740
if (Configuration.DefaultInitializeLiveInternals)
17401741
identifyDefaultAbstractAttributes(const_cast<Function &>(F));
1742+
if (Configuration.InitializationCallback)
1743+
Configuration.InitializationCallback(*this, F);
17411744
}
17421745

17431746
/// Helper function to remove callsite.

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2163,8 +2163,13 @@ struct OpenMPOpt {
21632163
void registerFoldRuntimeCall(RuntimeFunction RF);
21642164

21652165
/// Populate the Attributor with abstract attribute opportunities in the
2166-
/// function.
2166+
/// functions.
21672167
void registerAAs(bool IsModulePass);
2168+
2169+
public:
2170+
/// Callback to register AAs for live functions, including internal functions
2171+
/// marked live during the traversal.
2172+
static void registerAAsForFunction(Attributor &A, const Function &F);
21682173
};
21692174

21702175
Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
@@ -4849,20 +4854,35 @@ void OpenMPOpt::registerAAs(bool IsModulePass) {
48494854
if (F->isDeclaration())
48504855
continue;
48514856

4852-
if (!DisableOpenMPOptDeglobalization)
4853-
A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
4854-
A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F));
4855-
if (!DisableOpenMPOptDeglobalization)
4856-
A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
4857-
4858-
for (auto &I : instructions(*F)) {
4859-
if (auto *LI = dyn_cast<LoadInst>(&I)) {
4860-
bool UsedAssumedInformation = false;
4861-
A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr,
4862-
UsedAssumedInformation, AA::Interprocedural);
4863-
} else if (auto *SI = dyn_cast<StoreInst>(&I)) {
4864-
A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI));
4865-
}
4857+
// We look at internal functions only on-demand but if any use is not a
4858+
// direct call or outside the current set of analyzed functions, we have
4859+
// to do it eagerly.
4860+
if (F->hasLocalLinkage()) {
4861+
if (llvm::all_of(F->uses(), [this](const Use &U) {
4862+
const auto *CB = dyn_cast<CallBase>(U.getUser());
4863+
return CB && CB->isCallee(&U) &&
4864+
!A.isRunOn(const_cast<Function *>(CB->getCaller()));
4865+
}))
4866+
continue;
4867+
}
4868+
registerAAsForFunction(A, *F);
4869+
}
4870+
}
4871+
4872+
void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) {
4873+
if (!DisableOpenMPOptDeglobalization)
4874+
A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
4875+
A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(F));
4876+
if (!DisableOpenMPOptDeglobalization)
4877+
A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(F));
4878+
4879+
for (auto &I : instructions(F)) {
4880+
if (auto *LI = dyn_cast<LoadInst>(&I)) {
4881+
bool UsedAssumedInformation = false;
4882+
A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr,
4883+
UsedAssumedInformation, AA::Interprocedural);
4884+
} else if (auto *SI = dyn_cast<StoreInst>(&I)) {
4885+
A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI));
48664886
}
48674887
}
48684888
}
@@ -5033,10 +5053,13 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
50335053
}
50345054

50355055
// Look at every function in the Module unless it was internalized.
5056+
SetVector<Function *> Functions;
50365057
SmallVector<Function *, 16> SCC;
50375058
for (Function &F : M)
5038-
if (!F.isDeclaration() && !InternalizedMap.lookup(&F))
5059+
if (!F.isDeclaration() && !InternalizedMap.lookup(&F)) {
50395060
SCC.push_back(&F);
5061+
Functions.insert(&F);
5062+
}
50405063

50415064
if (SCC.empty())
50425065
return PreservedAnalyses::all();
@@ -5057,12 +5080,13 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
50575080

50585081
AttributorConfig AC(CGUpdater);
50595082
AC.DefaultInitializeLiveInternals = false;
5083+
AC.IsModulePass = true;
50605084
AC.RewriteSignatures = false;
50615085
AC.MaxFixpointIterations = MaxFixpointIterations;
50625086
AC.OREGetter = OREGetter;
50635087
AC.PassName = DEBUG_TYPE;
5088+
AC.InitializationCallback = OpenMPOpt::registerAAsForFunction;
50645089

5065-
SetVector<Function *> Functions;
50665090
Attributor A(Functions, InfoCache, AC);
50675091

50685092
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
@@ -5137,6 +5161,7 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
51375161
AC.MaxFixpointIterations = MaxFixpointIterations;
51385162
AC.OREGetter = OREGetter;
51395163
AC.PassName = DEBUG_TYPE;
5164+
AC.InitializationCallback = OpenMPOpt::registerAAsForFunction;
51405165

51415166
Attributor A(Functions, InfoCache, AC);
51425167

llvm/test/Transforms/Attributor/reduced/openmp_opt_dont_follow_gep_without_value.ll

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,6 @@ define weak_odr ptr @h(ptr %0) {
2929
; CHECK-NEXT: ret void
3030
;
3131
;
32-
; CHECK: Function Attrs: norecurse nounwind memory(none)
33-
; CHECK-LABEL: define {{[^@]+}}@g
34-
; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR0]] {
35-
; CHECK-NEXT: ret double 0.000000e+00
36-
;
37-
;
38-
; CHECK: Function Attrs: norecurse nosync nounwind memory(none)
39-
; CHECK-LABEL: define {{[^@]+}}@h.internalized
40-
; CHECK-SAME: (ptr [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] {
41-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[TMP0]], i64 undef
42-
; CHECK-NEXT: ret ptr [[TMP2]]
43-
;
44-
;
4532
; CHECK-LABEL: define {{[^@]+}}@h
4633
; CHECK-SAME: (ptr [[TMP0:%.*]]) {
4734
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP0]], align 4
@@ -50,7 +37,6 @@ define weak_odr ptr @h(ptr %0) {
5037
;
5138
;.
5239
; CHECK: attributes #[[ATTR0]] = { norecurse nounwind memory(none) }
53-
; CHECK: attributes #[[ATTR1]] = { norecurse nosync nounwind memory(none) }
5440
;.
5541
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
5642
; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}

llvm/test/Transforms/OpenMP/global_constructor.ll

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ declare i32 @__kmpc_target_init(ptr, i8, i1) local_unnamed_addr
3737

3838
declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr
3939

40-
define internal void @__omp_offloading__fd02_85283c04_Device_l6_ctor() {
40+
define weak void @__omp_offloading__fd02_85283c04_Device_l6_ctor() {
4141
entry:
4242
%call.i = tail call double @__nv_log(double noundef 2.000000e+00) #1
4343
%call.i2 = tail call double @__nv_log(double noundef 2.000000e+00) #1
@@ -78,31 +78,29 @@ attributes #1 = { convergent nounwind }
7878
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_85283c04_main_l11
7979
; CHECK-SAME: (ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr {
8080
; CHECK-NEXT: entry:
81-
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1:[0-9]+]], i8 2, i1 false) #[[ATTR2:[0-9]+]]
81+
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1:[0-9]+]], i8 2, i1 false) #[[ATTR1:[0-9]+]]
8282
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
8383
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
8484
; CHECK: common.ret:
8585
; CHECK-NEXT: ret void
8686
; CHECK: user_code.entry:
8787
; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @_ZL6Device, align 8, !tbaa [[TBAA11:![0-9]+]]
88-
; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR2]]
88+
; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR1]]
8989
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
9090
; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
9191
; CHECK: region.guarded:
9292
; CHECK-NEXT: store double [[TMP1]], ptr [[X]], align 8, !tbaa [[TBAA11]]
9393
; CHECK-NEXT: br label [[REGION_BARRIER]]
9494
; CHECK: region.barrier:
95-
; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]]
96-
; CHECK-NEXT: tail call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR2]]
95+
; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR1]]
96+
; CHECK-NEXT: tail call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR1]]
9797
; CHECK-NEXT: br label [[COMMON_RET]]
9898
;
9999
;
100-
; CHECK: Function Attrs: norecurse
101-
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading__fd02_85283c04_Device_l6_ctor
102-
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
100+
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading__fd02_85283c04_Device_l6_ctor() {
103101
; CHECK-NEXT: entry:
104-
; CHECK-NEXT: [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR1:[0-9]+]]
105-
; CHECK-NEXT: [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR1]]
102+
; CHECK-NEXT: [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR0:[0-9]+]]
103+
; CHECK-NEXT: [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR0]]
106104
; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[CALL_I]], [[CALL_I2]]
107105
; CHECK-NEXT: store double [[DIV]], ptr @_ZL6Device, align 8, !tbaa [[TBAA11]]
108106
; CHECK-NEXT: ret void

llvm/test/Transforms/OpenMP/reduced_pointer_info_assertion.ll

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,6 @@ define fastcc void @rec(ptr %0, i64 %1) {
2929

3030
!0 = !{i32 7, !"openmp", i32 50}
3131
!1 = !{i32 7, !"openmp-device", i32 50}
32-
; MODULE-LABEL: define {{[^@]+}}@nblist
33-
; MODULE-SAME: () #[[ATTR0:[0-9]+]] {
34-
; MODULE-NEXT: [[TMP1:%.*]] = call ptr @alloc()
35-
; MODULE-NEXT: call fastcc void @rec.internalized(ptr [[TMP1]], i64 0)
36-
; MODULE-NEXT: ret i32 0
37-
;
38-
;
39-
; MODULE-LABEL: define {{[^@]+}}@rec.internalized
40-
; MODULE-SAME: (ptr nocapture writeonly [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] {
41-
; MODULE-NEXT: call fastcc void @rec.internalized(ptr nocapture writeonly [[TMP0]], i64 0) #[[ATTR2:[0-9]+]]
42-
; MODULE-NEXT: ret void
43-
;
44-
;
4532
; MODULE-LABEL: define {{[^@]+}}@rec
4633
; MODULE-SAME: (ptr [[TMP0:%.*]], i64 [[TMP1:%.*]]) {
4734
; MODULE-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[TMP1]]

llvm/test/Transforms/OpenMP/single_threaded_execution.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,13 @@ if.end:
5151
ret void
5252
}
5353

54+
; CHECK: [openmp-opt] Basic block @foo entry is executed by a single thread.
55+
; Function Attrs: noinline
56+
define internal void @foo() {
57+
entry:
58+
ret void
59+
}
60+
5461
; CHECK-NOT: [openmp-opt] Basic block @amdgcn entry is executed by a single thread.
5562
; CHECK-DAG: [openmp-opt] Basic block @amdgcn if.then is executed by a single thread.
5663
; CHECK-NOT: [openmp-opt] Basic block @amdgcn if.end is executed by a single thread.
@@ -72,13 +79,6 @@ if.end:
7279
ret void
7380
}
7481

75-
; CHECK: [openmp-opt] Basic block @foo entry is executed by a single thread.
76-
; Function Attrs: noinline
77-
define internal void @foo() {
78-
entry:
79-
ret void
80-
}
81-
8282
; CHECK: [openmp-opt] Basic block @bar.internalized entry is executed by a single thread.
8383
; Function Attrs: noinline
8484
define void @bar() {

0 commit comments

Comments
 (0)