Skip to content

[WIP][AMDGPU] Enable hostcall printf for OpenCL #70932

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions clang/include/clang/Basic/BuiltinsAMDGPU.def
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@
#if defined(BUILTIN) && !defined(TARGET_BUILTIN)
# define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
#endif

#if defined(BUILTIN) && !defined(LANGBUILTIN)
#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS)
#endif
//===----------------------------------------------------------------------===//
// SI+ only builtins.
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -402,5 +406,9 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32, "iffiIb", "nc", "fp8-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f32, "ifiiIi", "nc", "fp8-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")

// OpenCL
LANGBUILTIN(printf, "icC*4.", "fp:0:", ALL_OCL_LANGUAGES)

#undef BUILTIN
#undef TARGET_BUILTIN
#undef LANGBUILTIN
10 changes: 7 additions & 3 deletions clang/include/clang/Basic/TargetOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,16 +92,20 @@ class TargetOptions {

/// \brief Enumeration values for AMDGPU printf lowering scheme
enum class AMDGPUPrintfKind {
/// Use deafult lowering scheme, HIP programs use hostcall and OpenCL uses
/// buffered by default,
None = 0,

/// printf lowering scheme involving hostcalls, currently used by HIP
/// programs by default
Hostcall = 0,
Hostcall = 1,

/// printf lowering scheme involving implicit printf buffers,
Buffered = 1,
Buffered = 2,
};

/// \brief AMDGPU Printf lowering scheme
AMDGPUPrintfKind AMDGPUPrintfKindVal = AMDGPUPrintfKind::Hostcall;
AMDGPUPrintfKind AMDGPUPrintfKindVal = AMDGPUPrintfKind::None;

// The code model to be used as specified by the user. Corresponds to
// CodeModel::Model enum defined in include/llvm/Support/CodeGen.h, plus
Expand Down
8 changes: 5 additions & 3 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1251,15 +1251,17 @@ def emit_static_lib : Flag<["--"], "emit-static-lib">,

def mprintf_kind_EQ : Joined<["-"], "mprintf-kind=">, Group<m_Group>,
HelpText<"Specify the printf lowering scheme (AMDGPU only), allowed values are "
"\"none\" (Use default lowering scheme for a language, HIP uses hostcalls and "
"OpenCL uses Buffered scheme), "
"\"hostcall\"(printing happens during kernel execution, this scheme "
"relies on hostcalls which require system to support pcie atomics) "
"and \"buffered\"(printing happens after all kernel threads exit, "
"this uses a printf buffer and does not rely on pcie atomic support)">,
Visibility<[ClangOption, CC1Option]>,
Values<"hostcall,buffered">,
Values<"none,hostcall,buffered">,
NormalizedValuesScope<"TargetOptions::AMDGPUPrintfKind">,
NormalizedValues<["Hostcall", "Buffered"]>,
MarshallingInfoEnum<TargetOpts<"AMDGPUPrintfKindVal">, "Hostcall">;
NormalizedValues<["None", "Hostcall", "Buffered"]>,
MarshallingInfoEnum<TargetOpts<"AMDGPUPrintfKindVal">, "None">;

// HIP options
let Group = hip_Group in {
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/AST/Decl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/Specifiers.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetCXXABI.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/Visibility.h"
Expand Down Expand Up @@ -3585,6 +3586,12 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const {
if (!ConsiderWrapperFunctions && getStorageClass() == SC_Static)
return 0;

// AMDGCN implementation supports printf as a special case even
// for OpenCL
if (Context.getTargetInfo().getTriple().isAMDGCN() &&
Context.getLangOpts().OpenCL && BuiltinID == AMDGPU::BIprintf)
return BuiltinID;

// OpenCL v1.2 s6.9.f - The library functions defined in
// the C99 standard headers are not available.
if (Context.getLangOpts().OpenCL &&
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Basic/Targets/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ static constexpr Builtin::Info BuiltinInfo[] = {
{#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
{#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) \
{#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG},
#include "clang/Basic/BuiltinsAMDGPU.def"
};

Expand Down
8 changes: 7 additions & 1 deletion clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2370,6 +2370,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
&getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
BuiltinID = mutateLongDoubleBuiltin(BuiltinID);

// Mutate the pritnf builtin ID since we use the same CodeGen path for
// HIP and OpenCL
if (getTarget().getTriple().isAMDGCN() && BuiltinID == AMDGPU::BIprintf)
BuiltinID = Builtin::BIprintf;

// If the builtin has been declared explicitly with an assembler label,
// disable the specialized emitting below. Ideally we should communicate the
// rename in IR, or at least avoid generating the intrinsic calls that are
Expand Down Expand Up @@ -5529,7 +5534,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return EmitOpenMPDevicePrintfCallExpr(E);
if (getTarget().getTriple().isNVPTX())
return EmitNVPTXDevicePrintfCallExpr(E);
if (getTarget().getTriple().isAMDGCN() && getLangOpts().HIP)
if (getTarget().getTriple().isAMDGCN() &&
(getLangOpts().HIP || getLangOpts().OpenCL))
return EmitAMDGPUDevicePrintfCallExpr(E);
}

Expand Down
27 changes: 21 additions & 6 deletions clang/lib/CodeGen/CGGPUBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include "CodeGenFunction.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/TargetBuiltins.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instruction.h"
#include "llvm/Support/MathExtras.h"
Expand Down Expand Up @@ -176,17 +177,29 @@ RValue CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E) {
E, this, GetVprintfDeclaration(CGM.getModule()), false);
}

// Deterimines if an argument is a string
static bool isString(const clang::Type *argXTy) {
if ((argXTy->isPointerType() || argXTy->isConstantArrayType()) &&
argXTy->getPointeeOrArrayElementType()->isCharType())
return true;
else
return false;
}

RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
assert(getTarget().getTriple().getArch() == llvm::Triple::amdgcn);
assert(E->getBuiltinCallee() == Builtin::BIprintf ||
E->getBuiltinCallee() == Builtin::BI__builtin_printf);
E->getBuiltinCallee() == Builtin::BI__builtin_printf ||
E->getBuiltinCallee() == AMDGPU::BIprintf);
assert(E->getNumArgs() >= 1); // printf always has at least one arg.

CallArgList CallArgs;
EmitCallArgs(CallArgs,
E->getDirectCallee()->getType()->getAs<FunctionProtoType>(),
E->arguments(), E->getDirectCallee(),
/* ParamsToSkip = */ 0);
llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());

SmallVector<llvm::Value *, 8> Args;
for (const auto &A : CallArgs) {
Expand All @@ -197,14 +210,16 @@ RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
}

llvm::Value *Arg = A.getRValue(*this).getScalarVal();
if (isString(A.getType().getTypePtr()) && CGM.getLangOpts().OpenCL)
Arg = Builder.CreateAddrSpaceCast(Arg, CGM.Int8PtrTy);
Args.push_back(Arg);
}

llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());

bool isBuffered = (CGM.getTarget().getTargetOpts().AMDGPUPrintfKindVal ==
clang::TargetOptions::AMDGPUPrintfKind::Buffered);
auto PFK = CGM.getTarget().getTargetOpts().AMDGPUPrintfKindVal;
bool isBuffered =
((PFK == clang::TargetOptions::AMDGPUPrintfKind::Buffered) ||
(CGM.getLangOpts().OpenCL &&
(PFK == clang::TargetOptions::AMDGPUPrintfKind::None)));
auto Printf = llvm::emitAMDGPUPrintfCall(IRB, Args, isBuffered);
Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint());
return RValue::get(Printf);
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -855,8 +855,8 @@ void CodeGenModule::Release() {
// Currently, "-mprintf-kind" option is only supported for HIP
if (LangOpts.HIP) {
auto *MDStr = llvm::MDString::get(
getLLVMContext(), (getTarget().getTargetOpts().AMDGPUPrintfKindVal ==
TargetOptions::AMDGPUPrintfKind::Hostcall)
getLLVMContext(), (getTarget().getTargetOpts().AMDGPUPrintfKindVal !=
TargetOptions::AMDGPUPrintfKind::Buffered)
? "hostcall"
: "buffered");
getModule().addModuleFlag(llvm::Module::Error, "amdgpu_printf_kind",
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4742,6 +4742,16 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
Args.ClaimAllArgs(options::OPT_gen_cdb_fragment_path);
}

if (TC.getTriple().isAMDGPU() && types::isOpenCL(Input.getType())) {
if (Args.getLastArg(options::OPT_mprintf_kind_EQ)) {
CmdArgs.push_back(Args.MakeArgString(
"-mprintf-kind=" +
Args.getLastArgValue(options::OPT_mprintf_kind_EQ)));
// Force compiler error on invalid conversion specifiers
CmdArgs.push_back(Args.MakeArgString("-Werror=format-invalid-specifier"));
}
}

if (IsCuda || IsHIP) {
// We have to pass the triple of the host if compiling for a CUDA/HIP device
// and vice-versa.
Expand Down
6 changes: 3 additions & 3 deletions clang/test/CodeGenHIP/printf-kind-module-flag.hip
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mprintf-kind=buffered -o - %s | FileCheck -check-prefix=BUFFERED %s

// RUN: not %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mprintf-kind=none -o - %s 2>&1| FileCheck %s -check-prefix=INV
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mprintf-kind=none -o - %s 2>&1| FileCheck %s -check-prefix=NONE

// HOSTCALL: !{{.*}} = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
// BUFFERED: !{{.*}} = !{i32 1, !"amdgpu_printf_kind", !"buffered"}
// INV: error: invalid value 'none' in '-mprintf-kind=none'
// NONE: !{{.*}} = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
Loading