Skip to content

Commit 4c04670

Browse files
committed
[WIP][AMDGPU] hostcall printf support for OpenCL
1 parent 5a9b996 commit 4c04670

File tree

12 files changed

+359
-71
lines changed

12 files changed

+359
-71
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
#if defined(BUILTIN) && !defined(TARGET_BUILTIN)
2222
# define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
2323
#endif
24+
25+
#if defined(BUILTIN) && !defined(LANGBUILTIN)
26+
#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS)
27+
#endif
2428
//===----------------------------------------------------------------------===//
2529
// SI+ only builtins.
2630
//===----------------------------------------------------------------------===//
@@ -402,5 +406,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-insts")
402406
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
403407
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
404408

409+
// OpenCL
410+
LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES)
411+
405412
#undef BUILTIN
406413
#undef TARGET_BUILTIN
414+
#undef LANGBUILTIN

clang/include/clang/Basic/TargetOptions.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,16 +92,20 @@ class TargetOptions {
9292

9393
/// \brief Enumeration values for AMDGPU printf lowering scheme
9494
enum class AMDGPUPrintfKind {
95+
/// Use deafult lowering scheme, HIP programs use hostcall and OpenCL uses
96+
/// buffered by default,
97+
None = 0,
98+
9599
/// printf lowering scheme involving hostcalls, currently used by HIP
96100
/// programs by default
97-
Hostcall = 0,
101+
Hostcall = 1,
98102

99103
/// printf lowering scheme involving implicit printf buffers,
100-
Buffered = 1,
104+
Buffered = 2,
101105
};
102106

103107
/// \brief AMDGPU Printf lowering scheme
104-
AMDGPUPrintfKind AMDGPUPrintfKindVal = AMDGPUPrintfKind::Hostcall;
108+
AMDGPUPrintfKind AMDGPUPrintfKindVal = AMDGPUPrintfKind::None;
105109

106110
// The code model to be used as specified by the user. Corresponds to
107111
// CodeModel::Model enum defined in include/llvm/Support/CodeGen.h, plus

clang/include/clang/Driver/Options.td

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1251,15 +1251,17 @@ def emit_static_lib : Flag<["--"], "emit-static-lib">,
12511251

12521252
def mprintf_kind_EQ : Joined<["-"], "mprintf-kind=">, Group<m_Group>,
12531253
HelpText<"Specify the printf lowering scheme (AMDGPU only), allowed values are "
1254+
"\"none\" (Use default lowering scheme for a language, HIP uses hostcalls and "
1255+
"OpenCL uses Buffered scheme), "
12541256
"\"hostcall\"(printing happens during kernel execution, this scheme "
12551257
"relies on hostcalls which require system to support pcie atomics) "
12561258
"and \"buffered\"(printing happens after all kernel threads exit, "
12571259
"this uses a printf buffer and does not rely on pcie atomic support)">,
12581260
Visibility<[ClangOption, CC1Option]>,
1259-
Values<"hostcall,buffered">,
1261+
Values<"none,hostcall,buffered">,
12601262
NormalizedValuesScope<"TargetOptions::AMDGPUPrintfKind">,
1261-
NormalizedValues<["Hostcall", "Buffered"]>,
1262-
MarshallingInfoEnum<TargetOpts<"AMDGPUPrintfKindVal">, "Hostcall">;
1263+
NormalizedValues<["None", "Hostcall", "Buffered"]>,
1264+
MarshallingInfoEnum<TargetOpts<"AMDGPUPrintfKindVal">, "None">;
12631265

12641266
// HIP options
12651267
let Group = hip_Group in {

clang/lib/AST/Decl.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
#include "clang/Basic/SourceLocation.h"
5050
#include "clang/Basic/SourceManager.h"
5151
#include "clang/Basic/Specifiers.h"
52+
#include "clang/Basic/TargetBuiltins.h"
5253
#include "clang/Basic/TargetCXXABI.h"
5354
#include "clang/Basic/TargetInfo.h"
5455
#include "clang/Basic/Visibility.h"
@@ -3585,6 +3586,12 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const {
35853586
if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static)
35863587
return 0;
35873588

3589+
// AMDGCN implementation supports printf as a special case even
3590+
// for OpenCL
3591+
if (Context.getTargetInfo().getTriple().isAMDGCN() &&
3592+
Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf)
3593+
return BuiltinID;
3594+
35883595
// OpenCL v1.2 s6.9.f - The library functions defined in
35893596
// the C99 standard headers are not available.
35903597
if (Context.getLangOpts().OpenCL &&

clang/lib/Basic/Targets/AMDGPU.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ static constexpr Builtin::Info BuiltinInfo[] = {
9191
{#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
9292
#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
9393
{#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
94+
#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) \
95+
{#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG},
9496
#include "clang/Basic/BuiltinsAMDGPU.def"
9597
};
9698

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2370,6 +2370,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
23702370
&getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
23712371
BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
23722372

2373+
// Mutate the pritnf builtin ID since we use the same CodeGen path for
2374+
// HIP and OpenCL
2375+
if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf)
2376+
BuiltinID = Builtin::BIprintf;
2377+
23732378
// If the builtin has been declared explicitly with an assembler label,
23742379
// disable the specialized emitting below. Ideally we should communicate the
23752380
// rename in IR, or at least avoid generating the intrinsic calls that are
@@ -5529,7 +5534,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
55295534
return EmitOpenMPDevicePrintfCallExpr(E);
55305535
if (getTarget().getTriple().isNVPTX())
55315536
return EmitNVPTXDevicePrintfCallExpr(E);
5532-
if (getTarget().getTriple().isAMDGCN() && getLangOpts().HIP)
5537+
if (getTarget().getTriple().isAMDGCN() &&
5538+
(getLangOpts().HIP || getLangOpts().OpenCL))
55335539
return EmitAMDGPUDevicePrintfCallExpr(E);
55345540
}
55355541

clang/lib/CodeGen/CGGPUBuiltin.cpp

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#include "CodeGenFunction.h"
1515
#include "clang/Basic/Builtins.h"
16+
#include "clang/Basic/TargetBuiltins.h"
1617
#include "llvm/IR/DataLayout.h"
1718
#include "llvm/IR/Instruction.h"
1819
#include "llvm/Support/MathExtras.h"
@@ -176,17 +177,29 @@ RValue CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E) {
176177
E, this, GetVprintfDeclaration(CGM.getModule()), false);
177178
}
178179

180+
// Deterimines if an argument is a string
181+
static bool isString(const clang::Type *argXTy) {
182+
if ((argXTy->isPointerType() || argXTy->isConstantArrayType()) &&
183+
argXTy->getPointeeOrArrayElementType()->isCharType())
184+
return true;
185+
else
186+
return false;
187+
}
188+
179189
RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
180190
assert(getTarget().getTriple().getArch() == llvm::Triple::amdgcn);
181191
assert(E->getBuiltinCallee() == Builtin::BIprintf ||
182-
E->getBuiltinCallee() == Builtin::BI__builtin_printf);
192+
E->getBuiltinCallee() == Builtin::BI__builtin_printf ||
193+
E->getBuiltinCallee() == AMDGPU::BIprintf);
183194
assert(E->getNumArgs() >= 1); // printf always has at least one arg.
184195

185196
CallArgList CallArgs;
186197
EmitCallArgs(CallArgs,
187198
E->getDirectCallee()->getType()->getAs<FunctionProtoType>(),
188199
E->arguments(), E->getDirectCallee(),
189200
/* ParamsToSkip = */ 0);
201+
llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
202+
IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());
190203

191204
SmallVector<llvm::Value *, 8> Args;
192205
for (const auto &A : CallArgs) {
@@ -197,14 +210,16 @@ RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
197210
}
198211

199212
llvm::Value *Arg = A.getRValue(*this).getScalarVal();
213+
if (isString(A.getType().getTypePtr()) && CGM.getLangOpts().OpenCL)
214+
Arg = Builder.CreateAddrSpaceCast(Arg, CGM.Int8PtrTy);
200215
Args.push_back(Arg);
201216
}
202217

203-
llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
204-
IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());
205-
206-
bool isBuffered = (CGM.getTarget().getTargetOpts().AMDGPUPrintfKindVal ==
207-
clang::TargetOptions::AMDGPUPrintfKind::Buffered);
218+
auto PFK = CGM.getTarget().getTargetOpts().AMDGPUPrintfKindVal;
219+
bool isBuffered =
220+
((PFK == clang::TargetOptions::AMDGPUPrintfKind::Buffered) ||
221+
(CGM.getLangOpts().OpenCL &&
222+
(PFK == clang::TargetOptions::AMDGPUPrintfKind::None)));
208223
auto Printf = llvm::emitAMDGPUPrintfCall(IRB, Args, isBuffered);
209224
Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint());
210225
return RValue::get(Printf);

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -855,8 +855,8 @@ void CodeGenModule::Release() {
855855
// Currently, "-mprintf-kind" option is only supported for HIP
856856
if (LangOpts.HIP) {
857857
auto *MDStr = llvm::MDString::get(
858-
getLLVMContext(), (getTarget().getTargetOpts().AMDGPUPrintfKindVal ==
859-
TargetOptions::AMDGPUPrintfKind::Hostcall)
858+
getLLVMContext(), (getTarget().getTargetOpts().AMDGPUPrintfKindVal !=
859+
TargetOptions::AMDGPUPrintfKind::Buffered)
860860
? "hostcall"
861861
: "buffered");
862862
getModule().addModuleFlag(llvm::Module::Error, "amdgpu_printf_kind",

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4742,6 +4742,16 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
47424742
Args.ClaimAllArgs(options::OPT_gen_cdb_fragment_path);
47434743
}
47444744

4745+
if (TC.getTriple().isAMDGPU() && types::isOpenCL(Input.getType())) {
4746+
if (Args.getLastArg(options::OPT_mprintf_kind_EQ)) {
4747+
CmdArgs.push_back(Args.MakeArgString(
4748+
"-mprintf-kind=" +
4749+
Args.getLastArgValue(options::OPT_mprintf_kind_EQ)));
4750+
// Force compiler error on invalid conversion specifiers
4751+
CmdArgs.push_back(Args.MakeArgString("-Werror=format-invalid-specifier"));
4752+
}
4753+
}
4754+
47454755
if (IsCuda || IsHIP) {
47464756
// We have to pass the triple of the host if compiling for a CUDA/HIP device
47474757
// and vice-versa.

clang/test/CodeGenHIP/printf-kind-module-flag.hip

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
1010
// RUN: -mprintf-kind=buffered -o - %s | FileCheck -check-prefix=BUFFERED %s
1111

12-
// RUN: not %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
13-
// RUN: -mprintf-kind=none -o - %s 2>&1| FileCheck %s -check-prefix=INV
12+
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
13+
// RUN: -mprintf-kind=none -o - %s 2>&1| FileCheck %s -check-prefix=NONE
1414

1515
// HOSTCALL: !{{.*}} = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
1616
// BUFFERED: !{{.*}} = !{i32 1, !"amdgpu_printf_kind", !"buffered"}
17-
// INV: error: invalid value 'none' in '-mprintf-kind=none'
17+
// NONE: !{{.*}} = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}

0 commit comments

Comments
 (0)