Skip to content

Commit c6b0d11

Browse files
JonChesterfieldronlieb
authored andcommitted
[AMDGPU] Implement variadic functions by IR lowering (llvm#93362)
This is a mostly-target-independent variadic function optimisation and lowering pass. It is only enabled for AMDGPU in this initial commit. The purpose is to make C style variadic functions a zero cost abstraction. They are lowered to equivalent IR which is then amenable to other optimisations. This is inherently slightly target specific but much less so than one might expect - the C varargs interface heavily constrains the ABI design divergence. The pass is primarily tested from webassembly. This is because wasm has a straightforward variadic lowering strategy which coincides exactly with what this pass transforms code into and a struct passing convention with few cases to check. Adding further targets conventions is straightforward and elided from this patch primarily to simplify the review. Implemented in other branches are Linux X86, AMD64, AArch64 and NVPTX. Testing for targets that have existing lowering for va_arg from clang is most efficiently done by checking that clang | opt completely elides the variadic syntax from test cases. The lowering produces a struct for each call site which can be inspected to check the various alignment and indirections are correct. AMDGPU presently has no variadic support other than some ad hoc printf handling. Combined with the pass being inactive on all other targets landing this represents strict increase in capability with zero risk. Testing and refining will continue post commit. In addition to the compiler tests included here, a self contained x64 clang/musl toolchain was constructed using the "lowering" instead of the systemv ABI and used to build various C programs like lua and libxml2. Change-Id: I82529bd8fe91edbe772c091e89796e4549316304
1 parent a08e6d4 commit c6b0d11

25 files changed

+4566
-20
lines changed

clang/lib/CodeGen/Targets/AMDGPU.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,11 @@ void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const {
120120

121121
Address AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
122122
QualType Ty) const {
123-
llvm_unreachable("AMDGPU does not support varargs");
123+
const bool IsIndirect = false;
124+
const bool AllowHigherAlign = false;
125+
return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
126+
getContext().getTypeInfoInChars(Ty),
127+
CharUnits::fromQuantity(4), AllowHigherAlign);
124128
}
125129

126130
ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {

clang/test/CodeGen/voidptr-vaarg.c

Lines changed: 478 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
2+
// REQUIRES: webassembly-registered-target
3+
4+
// Simple calls to known variadic functions that are completely elided when
5+
// optimisations are on This is a functional check that the expand-variadic pass
6+
// is consistent with clang's va_arg handling
7+
8+
// When expand-variadics is added to the default pipeline, clang -O1 will
9+
// suffice here -Wno-varargs avoids warning second argument to 'va_start' is not
10+
// the last named parameter
11+
12+
// RUN: %clang_cc1 %s -triple wasm32-unknown-unknown -Wno-varargs -O1 -emit-llvm -o - | opt - -S --passes='module(expand-variadics,default<O1>)' --expand-variadics-override=optimize -o - | FileCheck %s
13+
14+
#include <stdarg.h>
15+
#include <stdint.h>
16+
17+
template <typename X, typename Y> static X first(...) {
18+
va_list va;
19+
__builtin_va_start(va, 0);
20+
X r = va_arg(va, X);
21+
va_end(va);
22+
return r;
23+
}
24+
25+
template <typename X, typename Y> static Y second(...) {
26+
va_list va;
27+
__builtin_va_start(va, 0);
28+
va_arg(va, X);
29+
Y r = va_arg(va, Y);
30+
va_end(va);
31+
return r;
32+
}
33+
34+
extern "C" {
35+
36+
// CHECK-LABEL: define {{[^@]+}}@first_pair_i32
37+
// CHECK-SAME: (i32 noundef returned [[X:%.*]], i32 noundef [[Y:%.*]])
38+
// CHECK-NEXT: entry:
39+
// CHECK-NEXT: ret i32 [[X]]
40+
//
41+
int first_pair_i32(int x, int y) { return first<int, int>(x, y); }
42+
43+
// CHECK-LABEL: define {{[^@]+}}@second_pair_i32
44+
// CHECK-SAME: (i32 noundef [[X:%.*]], i32 noundef returned [[Y:%.*]])
45+
// CHECK-NEXT: entry:
46+
// CHECK-NEXT: ret i32 [[Y]]
47+
//
48+
int second_pair_i32(int x, int y) { return second<int, int>(x, y); }
49+
50+
// CHECK-LABEL: define {{[^@]+}}@first_pair_f64
51+
// CHECK-SAME: (double noundef returned [[X:%.*]], double noundef [[Y:%.*]])
52+
// CHECK-NEXT: entry:
53+
// CHECK-NEXT: ret double [[X]]
54+
//
55+
double first_pair_f64(double x, double y) {
56+
return first<double, double>(x, y);
57+
}
58+
59+
// CHECK-LABEL: define {{[^@]+}}@second_pair_f64
60+
// CHECK-SAME: (double noundef [[X:%.*]], double noundef returned [[Y:%.*]])
61+
// CHECK-NEXT: entry:
62+
// CHECK-NEXT: ret double [[Y]]
63+
//
64+
double second_pair_f64(double x, double y) {
65+
return second<double, double>(x, y);
66+
}
67+
}
68+
69+
extern "C" {
70+
71+
// CHECK-LABEL: define {{[^@]+}}@first_i32_f64
72+
// CHECK-SAME: (i32 noundef returned [[X:%.*]], double noundef [[Y:%.*]])
73+
// CHECK-NEXT: entry:
74+
// CHECK-NEXT: ret i32 [[X]]
75+
//
76+
int first_i32_f64(int x, double y) { return first<int, double>(x, y); }
77+
78+
// CHECK-LABEL: define {{[^@]+}}@second_i32_f64
79+
// CHECK-SAME: (i32 noundef [[X:%.*]], double noundef returned [[Y:%.*]])
80+
// CHECK-NEXT: entry:
81+
// CHECK-NEXT: ret double [[Y]]
82+
//
83+
double second_i32_f64(int x, double y) { return second<int, double>(x, y); }
84+
85+
// CHECK-LABEL: define {{[^@]+}}@first_f64_i32
86+
// CHECK-SAME: (double noundef returned [[X:%.*]], i32 noundef [[Y:%.*]])
87+
// CHECK-NEXT: entry:
88+
// CHECK-NEXT: ret double [[X]]
89+
//
90+
double first_f64_i32(double x, int y) { return first<double, int>(x, y); }
91+
92+
// CHECK-LABEL: define {{[^@]+}}@second_f64_i32
93+
// CHECK-SAME: (double noundef [[X:%.*]], i32 noundef returned [[Y:%.*]])
94+
// CHECK-NEXT: entry:
95+
// CHECK-NEXT: ret i32 [[Y]]
96+
//
97+
int second_f64_i32(double x, int y) { return second<double, int>(x, y); }
98+
}
99+
100+
extern "C" {
101+
typedef uint64_t ulong2 __attribute__((__vector_size__(16), __aligned__(16)));
102+
103+
// CHECK-LABEL: define {{[^@]+}}@first_i32_ulong2
104+
// CHECK-SAME: (i32 noundef returned [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]])
105+
// CHECK-NEXT: entry:
106+
// CHECK-NEXT: ret i32 [[X]]
107+
//
108+
int first_i32_ulong2(int x, ulong2 *y) { return first<int, ulong2>(x, *y); }
109+
110+
// CHECK-LABEL: define {{[^@]+}}@second_i32_ulong2
111+
// CHECK-SAME: (i32 noundef [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
112+
// CHECK-NEXT: entry:
113+
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 16, !tbaa [[TBAA2:![0-9]+]]
114+
// CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[TBAA2]]
115+
// CHECK-NEXT: ret void
116+
//
117+
void second_i32_ulong2(int x, ulong2 *y, ulong2 *r) {
118+
*r = second<int, ulong2>(x, *y);
119+
}
120+
121+
// CHECK-LABEL: define {{[^@]+}}@first_ulong2_i32
122+
// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1]] {
123+
// CHECK-NEXT: entry:
124+
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[X]], align 16, !tbaa [[TBAA2]]
125+
// CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[TBAA2]]
126+
// CHECK-NEXT: ret void
127+
//
128+
void first_ulong2_i32(ulong2 *x, int y, ulong2 *r) {
129+
*r = first<ulong2, int>(*x, y);
130+
}
131+
132+
// CHECK-LABEL: define {{[^@]+}}@second_ulong2_i32
133+
// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef returned [[Y:%.*]])
134+
// CHECK-NEXT: entry:
135+
// CHECK-NEXT: ret i32 [[Y]]
136+
//
137+
int second_ulong2_i32(ulong2 *x, int y) { return second<ulong2, int>(*x, y); }
138+
}
139+
140+
// ascending alignment
141+
typedef struct {
142+
char c;
143+
short s;
144+
int i;
145+
long l;
146+
float f;
147+
double d;
148+
} asc;
149+
150+
extern "C" {
151+
152+
// CHECK-LABEL: define {{[^@]+}}@first_i32_asc
153+
// CHECK-SAME: (i32 noundef returned [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]])
154+
// CHECK-NEXT: entry:
155+
// CHECK-NEXT: ret i32 [[X]]
156+
//
157+
int first_i32_asc(int x, asc *y) { return first<int, asc>(x, *y); }
158+
159+
// CHECK-LABEL: define {{[^@]+}}@second_i32_asc
160+
// CHECK-SAME: (i32 noundef [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1]] {
161+
// CHECK-NEXT: entry:
162+
// CHECK-NEXT: tail call void @llvm.memmove.p0.p0.i32(ptr noundef nonnull align 8 dereferenceable(24) [[R]], ptr noundef nonnull align 1 dereferenceable(24) [[Y]], i32 24, i1 false)
163+
// CHECK-NEXT: ret void
164+
//
165+
void second_i32_asc(int x, asc *y, asc *r) { *r = second<int, asc>(x, *y); }
166+
167+
// CHECK-LABEL: define {{[^@]+}}@first_asc_i32
168+
// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1]] {
169+
// CHECK-NEXT: entry:
170+
// CHECK-NEXT: tail call void @llvm.memmove.p0.p0.i32(ptr noundef nonnull align 8 dereferenceable(24) [[R]], ptr noundef nonnull align 1 dereferenceable(24) [[X]], i32 24, i1 false)
171+
// CHECK-NEXT: ret void
172+
//
173+
void first_asc_i32(asc *x, int y, asc *r) { *r = first<asc, int>(*x, y); }
174+
175+
// CHECK-LABEL: define {{[^@]+}}@second_asc_i32
176+
// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef returned [[Y:%.*]])
177+
// CHECK-NEXT: entry:
178+
// CHECK-NEXT: ret i32 [[Y]]
179+
//
180+
int second_asc_i32(asc *x, int y) { return second<asc, int>(*x, y); }
181+
}

llvm/include/llvm/IR/InstrTypes.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1836,6 +1836,15 @@ class CallBase : public Instruction {
18361836
return Attrs.getParamStackAlignment(ArgNo);
18371837
}
18381838

1839+
/// Extract the byref type for a call or parameter.
1840+
Type *getParamByRefType(unsigned ArgNo) const {
1841+
if (auto *Ty = Attrs.getParamByRefType(ArgNo))
1842+
return Ty;
1843+
if (const Function *F = getCalledFunction())
1844+
return F->getAttributes().getParamByRefType(ArgNo);
1845+
return nullptr;
1846+
}
1847+
18391848
/// Extract the byval type for a call or parameter.
18401849
Type *getParamByValType(unsigned ArgNo) const {
18411850
if (auto *Ty = Attrs.getParamByValType(ArgNo))

llvm/include/llvm/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&);
106106
void initializeExpandMemCmpLegacyPassPass(PassRegistry &);
107107
void initializeExpandPostRAPass(PassRegistry&);
108108
void initializeExpandReductionsPass(PassRegistry&);
109+
void initializeExpandVariadicsPass(PassRegistry &);
109110
void initializeExpandVectorPredicationPass(PassRegistry &);
110111
void initializeExternalAAWrapperPassPass(PassRegistry&);
111112
void initializeFEntryInserterPass(PassRegistry&);
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
//===- ExpandVariadics.h - expand variadic functions ------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
#ifndef LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H
9+
#define LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H
10+
11+
#include "llvm/IR/PassManager.h"
12+
13+
namespace llvm {
14+
15+
class Module;
16+
class ModulePass;
17+
class OptimizationLevel;
18+
19+
enum class ExpandVariadicsMode {
20+
Unspecified, // Use the implementation defaults
21+
Disable, // Disable the pass entirely
22+
Optimize, // Optimise without changing ABI
23+
Lowering, // Change variadic calling convention
24+
};
25+
26+
class ExpandVariadicsPass : public PassInfoMixin<ExpandVariadicsPass> {
27+
const ExpandVariadicsMode Mode;
28+
29+
public:
30+
// Operates under passed mode unless overridden on commandline
31+
ExpandVariadicsPass(ExpandVariadicsMode Mode);
32+
33+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
34+
};
35+
36+
ModulePass *createExpandVariadicsPass(ExpandVariadicsMode);
37+
38+
} // end namespace llvm
39+
40+
#endif // LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H

llvm/lib/Passes/PassBuilder.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@
130130
#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
131131
#include "llvm/Transforms/IPO/ElimAvailExtern.h"
132132
#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
133+
#include "llvm/Transforms/IPO/ExpandVariadics.h"
133134
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
134135
#include "llvm/Transforms/IPO/FunctionAttrs.h"
135136
#include "llvm/Transforms/IPO/FunctionImport.h"

llvm/lib/Passes/PassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ MODULE_PASS("dxil-upgrade", DXILUpgradePass())
6161
MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass())
6262
MODULE_PASS("embed-bitcode", EmbedBitcodePass())
6363
MODULE_PASS("extract-blocks", BlockExtractorPass({}, false))
64+
MODULE_PASS("expand-variadics", ExpandVariadicsPass(ExpandVariadicsMode::Disable))
6465
MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
6566
MODULE_PASS("function-import", FunctionImportPass())
6667
MODULE_PASS("globalopt", GlobalOptPass())

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
5555
#include "llvm/Transforms/IPO.h"
5656
#include "llvm/Transforms/IPO/AlwaysInliner.h"
57+
#include "llvm/Transforms/IPO/ExpandVariadics.h"
5758
#include "llvm/Transforms/IPO/GlobalDCE.h"
5859
#include "llvm/Transforms/IPO/Internalize.h"
5960
#include "llvm/Transforms/Scalar.h"
@@ -1070,6 +1071,10 @@ void AMDGPUPassConfig::addIRPasses() {
10701071
if (isPassEnabled(EnableImageIntrinsicOptimizer))
10711072
addPass(createAMDGPUImageIntrinsicOptimizerPass(&TM));
10721073

1074+
// This can be disabled by passing ::Disable here or on the command line
1075+
// with --expand-variadics-override=disable.
1076+
addPass(createExpandVariadicsPass(ExpandVariadicsMode::Lowering));
1077+
10731078
// Function calls are not supported, so make sure we inline everything.
10741079
addPass(createAMDGPUAlwaysInlinePass());
10751080
addPass(createAlwaysInlinerLegacyPass());

llvm/lib/Transforms/IPO/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ add_llvm_component_library(LLVMipo
1212
DeadArgumentElimination.cpp
1313
ElimAvailExtern.cpp
1414
EmbedBitcodePass.cpp
15+
ExpandVariadics.cpp
1516
ExtractGV.cpp
1617
ForceFunctionAttrs.cpp
1718
FunctionAttrs.cpp

0 commit comments

Comments
 (0)