Skip to content

Commit 6d00264

Browse files
[AMDGPU] Implement variadic functions by IR lowering
1 parent 560c2fd commit 6d00264

26 files changed

+4552
-24
lines changed

clang/lib/CodeGen/Targets/AMDGPU.cpp

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ class AMDGPUABIInfo final : public DefaultABIInfo {
4545

4646
ABIArgInfo classifyReturnType(QualType RetTy) const;
4747
ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
48-
ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegsLeft) const;
48+
ABIArgInfo classifyArgumentType(QualType Ty, bool Variadic,
49+
unsigned &NumRegsLeft) const;
4950

5051
void computeInfo(CGFunctionInfo &FI) const override;
5152
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
@@ -103,19 +104,27 @@ void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const {
103104
if (!getCXXABI().classifyReturnType(FI))
104105
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
105106

107+
unsigned ArgumentIndex = 0;
108+
const unsigned numFixedArguments = FI.getNumRequiredArgs();
109+
106110
unsigned NumRegsLeft = MaxNumRegsForArgsRet;
107111
for (auto &Arg : FI.arguments()) {
108112
if (CC == llvm::CallingConv::AMDGPU_KERNEL) {
109113
Arg.info = classifyKernelArgumentType(Arg.type);
110114
} else {
111-
Arg.info = classifyArgumentType(Arg.type, NumRegsLeft);
115+
bool FixedArgument = ArgumentIndex++ < numFixedArguments;
116+
Arg.info = classifyArgumentType(Arg.type, !FixedArgument, NumRegsLeft);
112117
}
113118
}
114119
}
115120

116121
Address AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
117122
QualType Ty) const {
118-
llvm_unreachable("AMDGPU does not support varargs");
123+
const bool IsIndirect = false;
124+
const bool AllowHigherAlign = false;
125+
return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
126+
getContext().getTypeInfoInChars(Ty),
127+
CharUnits::fromQuantity(4), AllowHigherAlign);
119128
}
120129

121130
ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
@@ -197,12 +206,20 @@ ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const {
197206
return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
198207
}
199208

200-
ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
209+
ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
201210
unsigned &NumRegsLeft) const {
202211
assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow");
203212

204213
Ty = useFirstFieldIfTransparentUnion(Ty);
205214

215+
if (Variadic) {
216+
return ABIArgInfo::getDirect(/*T=*/nullptr,
217+
/*Offset=*/0,
218+
/*Padding=*/nullptr,
219+
/*CanBeFlattened=*/false,
220+
/*Align=*/0);
221+
}
222+
206223
if (isAggregateTypeForABI(Ty)) {
207224
// Records with non-trivial destructors/copy-constructors should not be
208225
// passed by value.

clang/test/CodeGen/voidptr-vaarg.c

Lines changed: 478 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
2+
3+
// Simple calls to known variadic functions that are completely elided when
4+
// optimisations are on This is a functional check that the expand-variadic pass
5+
// is consistent with clang's va_arg handling
6+
7+
// When expand-variadics is added to the default pipeline, clang -O1 will
8+
// suffice here -Wno-varargs avoids warning second argument to 'va_start' is not
9+
// the last named parameter
10+
11+
// RUN: %clang_cc1 %s -triple wasm32-unknown-unknown -Wno-varargs -O1 -emit-llvm -o - | opt - -S --passes='module(expand-variadics,default<O1>)' --expand-variadics-override=optimize -o - | FileCheck %s
12+
13+
#include <stdarg.h>
14+
#include <stdint.h>
15+
16+
template <typename X, typename Y> static X first(...) {
17+
va_list va;
18+
__builtin_va_start(va, 0);
19+
X r = va_arg(va, X);
20+
va_end(va);
21+
return r;
22+
}
23+
24+
template <typename X, typename Y> static Y second(...) {
25+
va_list va;
26+
__builtin_va_start(va, 0);
27+
va_arg(va, X);
28+
Y r = va_arg(va, Y);
29+
va_end(va);
30+
return r;
31+
}
32+
33+
extern "C" {
34+
35+
// CHECK-LABEL: define {{[^@]+}}@first_pair_i32
36+
// CHECK-SAME: (i32 noundef returned [[X:%.*]], i32 noundef [[Y:%.*]])
37+
// CHECK-NEXT: entry:
38+
// CHECK-NEXT: ret i32 [[X]]
39+
//
40+
int first_pair_i32(int x, int y) { return first<int, int>(x, y); }
41+
42+
// CHECK-LABEL: define {{[^@]+}}@second_pair_i32
43+
// CHECK-SAME: (i32 noundef [[X:%.*]], i32 noundef returned [[Y:%.*]])
44+
// CHECK-NEXT: entry:
45+
// CHECK-NEXT: ret i32 [[Y]]
46+
//
47+
int second_pair_i32(int x, int y) { return second<int, int>(x, y); }
48+
49+
// CHECK-LABEL: define {{[^@]+}}@first_pair_f64
50+
// CHECK-SAME: (double noundef returned [[X:%.*]], double noundef [[Y:%.*]])
51+
// CHECK-NEXT: entry:
52+
// CHECK-NEXT: ret double [[X]]
53+
//
54+
double first_pair_f64(double x, double y) {
55+
return first<double, double>(x, y);
56+
}
57+
58+
// CHECK-LABEL: define {{[^@]+}}@second_pair_f64
59+
// CHECK-SAME: (double noundef [[X:%.*]], double noundef returned [[Y:%.*]])
60+
// CHECK-NEXT: entry:
61+
// CHECK-NEXT: ret double [[Y]]
62+
//
63+
double second_pair_f64(double x, double y) {
64+
return second<double, double>(x, y);
65+
}
66+
}
67+
68+
extern "C" {
69+
70+
// CHECK-LABEL: define {{[^@]+}}@first_i32_f64
71+
// CHECK-SAME: (i32 noundef returned [[X:%.*]], double noundef [[Y:%.*]])
72+
// CHECK-NEXT: entry:
73+
// CHECK-NEXT: ret i32 [[X]]
74+
//
75+
int first_i32_f64(int x, double y) { return first<int, double>(x, y); }
76+
77+
// CHECK-LABEL: define {{[^@]+}}@second_i32_f64
78+
// CHECK-SAME: (i32 noundef [[X:%.*]], double noundef returned [[Y:%.*]])
79+
// CHECK-NEXT: entry:
80+
// CHECK-NEXT: ret double [[Y]]
81+
//
82+
double second_i32_f64(int x, double y) { return second<int, double>(x, y); }
83+
84+
// CHECK-LABEL: define {{[^@]+}}@first_f64_i32
85+
// CHECK-SAME: (double noundef returned [[X:%.*]], i32 noundef [[Y:%.*]])
86+
// CHECK-NEXT: entry:
87+
// CHECK-NEXT: ret double [[X]]
88+
//
89+
double first_f64_i32(double x, int y) { return first<double, int>(x, y); }
90+
91+
// CHECK-LABEL: define {{[^@]+}}@second_f64_i32
92+
// CHECK-SAME: (double noundef [[X:%.*]], i32 noundef returned [[Y:%.*]])
93+
// CHECK-NEXT: entry:
94+
// CHECK-NEXT: ret i32 [[Y]]
95+
//
96+
int second_f64_i32(double x, int y) { return second<double, int>(x, y); }
97+
}
98+
99+
extern "C" {
100+
typedef uint64_t ulong2 __attribute__((__vector_size__(16), __aligned__(16)));
101+
102+
// CHECK-LABEL: define {{[^@]+}}@first_i32_ulong2
103+
// CHECK-SAME: (i32 noundef returned [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]])
104+
// CHECK-NEXT: entry:
105+
// CHECK-NEXT: ret i32 [[X]]
106+
//
107+
int first_i32_ulong2(int x, ulong2 *y) { return first<int, ulong2>(x, *y); }
108+
109+
// CHECK-LABEL: define {{[^@]+}}@second_i32_ulong2
110+
// CHECK-SAME: (i32 noundef [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
111+
// CHECK-NEXT: entry:
112+
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 16, !tbaa [[TBAA2:![0-9]+]]
113+
// CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[TBAA2]]
114+
// CHECK-NEXT: ret void
115+
//
116+
void second_i32_ulong2(int x, ulong2 *y, ulong2 *r) {
117+
*r = second<int, ulong2>(x, *y);
118+
}
119+
120+
// CHECK-LABEL: define {{[^@]+}}@first_ulong2_i32
121+
// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1]] {
122+
// CHECK-NEXT: entry:
123+
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[X]], align 16, !tbaa [[TBAA2]]
124+
// CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[TBAA2]]
125+
// CHECK-NEXT: ret void
126+
//
127+
void first_ulong2_i32(ulong2 *x, int y, ulong2 *r) {
128+
*r = first<ulong2, int>(*x, y);
129+
}
130+
131+
// CHECK-LABEL: define {{[^@]+}}@second_ulong2_i32
132+
// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef returned [[Y:%.*]])
133+
// CHECK-NEXT: entry:
134+
// CHECK-NEXT: ret i32 [[Y]]
135+
//
136+
int second_ulong2_i32(ulong2 *x, int y) { return second<ulong2, int>(*x, y); }
137+
}
138+
139+
// ascending alignment
140+
typedef struct {
141+
char c;
142+
short s;
143+
int i;
144+
long l;
145+
float f;
146+
double d;
147+
} asc;
148+
149+
extern "C" {
150+
151+
// CHECK-LABEL: define {{[^@]+}}@first_i32_asc
152+
// CHECK-SAME: (i32 noundef returned [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]])
153+
// CHECK-NEXT: entry:
154+
// CHECK-NEXT: ret i32 [[X]]
155+
//
156+
int first_i32_asc(int x, asc *y) { return first<int, asc>(x, *y); }
157+
158+
// CHECK-LABEL: define {{[^@]+}}@second_i32_asc
159+
// CHECK-SAME: (i32 noundef [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1]] {
160+
// CHECK-NEXT: entry:
161+
// CHECK-NEXT: tail call void @llvm.memmove.p0.p0.i32(ptr noundef nonnull align 8 dereferenceable(24) [[R]], ptr noundef nonnull align 1 dereferenceable(24) [[Y]], i32 24, i1 false)
162+
// CHECK-NEXT: ret void
163+
//
164+
void second_i32_asc(int x, asc *y, asc *r) { *r = second<int, asc>(x, *y); }
165+
166+
// CHECK-LABEL: define {{[^@]+}}@first_asc_i32
167+
// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1]] {
168+
// CHECK-NEXT: entry:
169+
// CHECK-NEXT: tail call void @llvm.memmove.p0.p0.i32(ptr noundef nonnull align 8 dereferenceable(24) [[R]], ptr noundef nonnull align 1 dereferenceable(24) [[X]], i32 24, i1 false)
170+
// CHECK-NEXT: ret void
171+
//
172+
void first_asc_i32(asc *x, int y, asc *r) { *r = first<asc, int>(*x, y); }
173+
174+
// CHECK-LABEL: define {{[^@]+}}@second_asc_i32
175+
// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef returned [[Y:%.*]])
176+
// CHECK-NEXT: entry:
177+
// CHECK-NEXT: ret i32 [[Y]]
178+
//
179+
int second_asc_i32(asc *x, int y) { return second<asc, int>(*x, y); }
180+
}

llvm/cmake/modules/HandleLLVMOptions.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1233,7 +1233,7 @@ endif()
12331233
option(LLVM_BUILD_INSTRUMENTED_COVERAGE "Build LLVM and tools with Code Coverage instrumentation" Off)
12341234
option(LLVM_INDIVIDUAL_TEST_COVERAGE "Emit individual coverage file for each test case." OFF)
12351235
mark_as_advanced(LLVM_BUILD_INSTRUMENTED_COVERAGE)
1236-
append_if(LLVM_BUILD_INSTRUMENTED_COVERAGE "-fprofile-instr-generate=\"${LLVM_PROFILE_FILE_PATTERN}\" -fcoverage-mapping"
1236+
append_if(LLVM_BUILD_INSTRUMENTED_COVERAGE "-fprofile-instr-generate=\"${LLVM_PROFILE_FILE_PATTERN}\" -fcoverage-mapping -fcoverage-mcdc"
12371237
CMAKE_CXX_FLAGS
12381238
CMAKE_C_FLAGS
12391239
CMAKE_EXE_LINKER_FLAGS

llvm/include/llvm/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&);
106106
void initializeExpandMemCmpLegacyPassPass(PassRegistry &);
107107
void initializeExpandPostRAPass(PassRegistry&);
108108
void initializeExpandReductionsPass(PassRegistry&);
109+
void initializeExpandVariadicsPass(PassRegistry &);
109110
void initializeExpandVectorPredicationPass(PassRegistry &);
110111
void initializeExternalAAWrapperPassPass(PassRegistry&);
111112
void initializeFEntryInserterPass(PassRegistry&);
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
//===- ExpandVariadics.h - expand variadic functions ------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
#ifndef LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H
9+
#define LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H
10+
11+
#include "llvm/IR/PassManager.h"
12+
13+
namespace llvm {
14+
15+
class Module;
16+
class ModulePass;
17+
class OptimizationLevel;
18+
19+
enum class ExpandVariadicsMode {
20+
Unspecified, // Use the implementation defaults
21+
Disable, // Disable the pass entirely
22+
Optimize, // Optimise without changing ABI
23+
Lowering, // Change variadic calling convention
24+
};
25+
26+
class ExpandVariadicsPass : public PassInfoMixin<ExpandVariadicsPass> {
27+
const ExpandVariadicsMode Mode;
28+
29+
public:
30+
// Operates under passed mode unless overridden on commandline
31+
ExpandVariadicsPass(ExpandVariadicsMode Mode);
32+
33+
// Chooses disable or optimize based on optimization level
34+
ExpandVariadicsPass(OptimizationLevel Level);
35+
36+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
37+
};
38+
39+
ModulePass *createExpandVariadicsPass(ExpandVariadicsMode);
40+
41+
} // end namespace llvm
42+
43+
#endif // LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H

llvm/lib/Passes/PassBuilder.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@
137137
#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
138138
#include "llvm/Transforms/IPO/ElimAvailExtern.h"
139139
#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
140+
#include "llvm/Transforms/IPO/ExpandVariadics.h"
140141
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
141142
#include "llvm/Transforms/IPO/FunctionAttrs.h"
142143
#include "llvm/Transforms/IPO/FunctionImport.h"

llvm/lib/Passes/PassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ MODULE_PASS("dot-callgraph", CallGraphDOTPrinterPass())
5959
MODULE_PASS("dxil-upgrade", DXILUpgradePass())
6060
MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass())
6161
MODULE_PASS("extract-blocks", BlockExtractorPass({}, false))
62+
MODULE_PASS("expand-variadics", ExpandVariadicsPass(OptimizationLevel::O0))
6263
MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
6364
MODULE_PASS("function-import", FunctionImportPass())
6465
MODULE_PASS("globalopt", GlobalOptPass())

llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ MODULE_PASS("amdgpu-lower-ctor-dtor", AMDGPUCtorDtorLoweringPass())
2424
MODULE_PASS("amdgpu-lower-module-lds", AMDGPULowerModuleLDSPass(*this))
2525
MODULE_PASS("amdgpu-printf-runtime-binding", AMDGPUPrintfRuntimeBindingPass())
2626
MODULE_PASS("amdgpu-unify-metadata", AMDGPUUnifyMetadataPass())
27+
#if 0
28+
Matt thought this was dead. Might be better removed, not sure overriding with Lowering is a good thing
29+
#endif
30+
MODULE_PASS("expand-variadics", ExpandVariadicsPass(ExpandVariadicsMode::Lowering))
2731
#undef MODULE_PASS
2832

2933
#ifndef FUNCTION_PASS

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
5656
#include "llvm/Transforms/IPO.h"
5757
#include "llvm/Transforms/IPO/AlwaysInliner.h"
58+
#include "llvm/Transforms/IPO/ExpandVariadics.h"
5859
#include "llvm/Transforms/IPO/GlobalDCE.h"
5960
#include "llvm/Transforms/IPO/Internalize.h"
6061
#include "llvm/Transforms/Scalar.h"
@@ -983,6 +984,8 @@ void AMDGPUPassConfig::addIRPasses() {
983984
if (isPassEnabled(EnableImageIntrinsicOptimizer))
984985
addPass(createAMDGPUImageIntrinsicOptimizerPass(&TM));
985986

987+
addPass(createExpandVariadicsPass(ExpandVariadicsMode::Lowering));
988+
986989
// Function calls are not supported, so make sure we inline everything.
987990
addPass(createAMDGPUAlwaysInlinePass());
988991
addPass(createAlwaysInlinerLegacyPass());

llvm/lib/Transforms/IPO/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ add_llvm_component_library(LLVMipo
1212
DeadArgumentElimination.cpp
1313
ElimAvailExtern.cpp
1414
EmbedBitcodePass.cpp
15+
ExpandVariadics.cpp
1516
ExtractGV.cpp
1617
ForceFunctionAttrs.cpp
1718
FunctionAttrs.cpp

0 commit comments

Comments
 (0)