Skip to content

Commit 0364611

Browse files
jhuber6yuxuanchen1997
authored andcommitted
[LLVM] Add 'ExpandVariadicsPass' to LTO default pipeline (#100479)
Summary: This pass expands variadic functions into non-variadic function calls according to the target ABI. Currently, this is used as the lowering for the NVPTX and AMDGPU targets. This pass is currently only run late in the target's backend. However, during LTO we want to run it before the inliner pass so that the expanded functions can be inlined using standard heuristics. This pass is a no-op for unsupported targets, so this won't apply to any code that isn't already using it. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250576
1 parent acf6d4e commit 0364611

File tree

3 files changed

+46
-0
lines changed

3 files changed

+46
-0
lines changed

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
4949
#include "llvm/Transforms/IPO/ElimAvailExtern.h"
5050
#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
51+
#include "llvm/Transforms/IPO/ExpandVariadics.h"
5152
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
5253
#include "llvm/Transforms/IPO/FunctionAttrs.h"
5354
#include "llvm/Transforms/IPO/GlobalDCE.h"
@@ -1874,6 +1875,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
18741875
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
18751876
PTO.EagerlyInvalidateAnalyses));
18761877

1878+
// Lower variadic functions for supported targets prior to inlining.
1879+
MPM.addPass(ExpandVariadicsPass(ExpandVariadicsMode::Optimize));
1880+
18771881
// Note: historically, the PruneEH pass was run first to deduce nounwind and
18781882
// generally clean up exception handling overhead. It isn't clear this is
18791883
// valuable as the inliner doesn't currently care whether it is inlining an

llvm/test/Other/new-pm-lto-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
; CHECK-O23SZ-NEXT: Running pass: InstCombinePass
7070
; CHECK-O23SZ-NEXT: Running pass: AggressiveInstCombinePass
7171
; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass
72+
; CHECK-O23SZ-NEXT: Running pass: ExpandVariadicsPass
7273
; CHECK-O23SZ-NEXT: Running pass: ModuleInlinerWrapperPass
7374
; CHECK-O23SZ-NEXT: Running analysis: InlineAdvisorAnalysis
7475
; CHECK-O23SZ-NEXT: Running pass: InlinerPass
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -mtriple=amdgcn-- -S -passes='lto<O2>' < %s | FileCheck %s
3+
target triple = "amdgcn-amd-amdhsa"
4+
5+
; We use the ExpandVariadics pass to lower variadic functions so they can be
6+
; inlined.
7+
8+
define i32 @foo() {
9+
; CHECK-LABEL: define i32 @foo(
10+
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
11+
; CHECK-NEXT: [[ENTRY:.*:]]
12+
; CHECK-NEXT: ret i32 6
13+
;
14+
entry:
15+
%call = tail call i32 (i32, ...) @vararg(i32 poison, i32 noundef 1, i32 noundef 2, i32 noundef 3)
16+
ret i32 %call
17+
}
18+
19+
define internal i32 @vararg(i32 %first, ...) {
20+
entry:
21+
%vlist = alloca ptr, align 8
22+
call void @llvm.va_start.p0(ptr %vlist)
23+
%vlist.promoted = load ptr, ptr %vlist, align 8
24+
%argp.a = getelementptr inbounds i8, ptr %vlist.promoted, i64 4
25+
store ptr %argp.a, ptr %vlist, align 8
26+
%a = load i32, ptr %vlist.promoted, align 4
27+
%argp.b = getelementptr inbounds i8, ptr %vlist.promoted, i64 8
28+
store ptr %argp.b, ptr %vlist, align 8
29+
%b = load i32, ptr %argp.a, align 4
30+
%sum = add nsw i32 %b, %a
31+
%argp.c = getelementptr inbounds i8, ptr %vlist.promoted, i64 12
32+
store ptr %argp.c, ptr %vlist, align 8
33+
%c = load i32, ptr %argp.b, align 4
34+
%ret = add nsw i32 %c, %sum
35+
call void @llvm.va_end.p0(ptr %vlist)
36+
ret i32 %ret
37+
}
38+
39+
declare void @llvm.va_start.p0(ptr)
40+
41+
declare void @llvm.va_end.p0(ptr)

0 commit comments

Comments
 (0)