Skip to content

[OpenMP] Remove dependency on libffi from offloading runtime #91264

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions clang/lib/CodeGen/CGOpenMPRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5927,12 +5927,16 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(

CodeGenFunction CGF(CGM, true);
llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
[&CGF, &D, &CodeGen](StringRef EntryFnName) {
[&CGF, &D, &CodeGen, this](StringRef EntryFnName) {
const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);

CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
if (CGM.getLangOpts().OpenMPIsTargetDevice && !isGPU())
return CGF.GenerateOpenMPCapturedStmtFunctionAggregate(
CS, D.getBeginLoc());
else
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
else

return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
};

OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
Expand Down
126 changes: 126 additions & 0 deletions clang/lib/CodeGen/CGStmtOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,102 @@ static llvm::Function *emitOutlinedFunctionPrologue(
return F;
}

static llvm::Function *emitOutlinedFunctionPrologueAggregate(
CodeGenFunction &CGF, FunctionArgList &Args,
llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
&LocalAddrs,
llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
&VLASizes,
llvm::Value *&CXXThisValue, const CapturedStmt &CS, SourceLocation Loc,
StringRef FunctionName) {
const CapturedDecl *CD = CS.getCapturedDecl();
const RecordDecl *RD = CS.getCapturedRecordDecl();

CXXThisValue = nullptr;
// Build the argument list.
CodeGenModule &CGM = CGF.CGM;
ASTContext &Ctx = CGM.getContext();
Args.append(CD->param_begin(), CD->param_end());

// Create the function declaration.
const CGFunctionInfo &FuncInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);

auto *F =
llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
FunctionName, &CGM.getModule());
CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
if (CD->isNothrow())
F->setDoesNotThrow();
F->setDoesNotRecurse();

// Generate the function.
CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, Loc, Loc);
Address ContextAddr = CGF.GetAddrOfLocalVar(CD->getContextParam());
llvm::Value *ContextV = CGF.Builder.CreateLoad(ContextAddr);
LValue ContextLV = CGF.MakeNaturalAlignAddrLValue(
ContextV, CGM.getContext().getTagDeclType(RD));
auto I = CS.captures().begin();
for (const FieldDecl *FD : RD->fields()) {
LValue FieldLV = CGF.EmitLValueForFieldInitialization(ContextLV, FD);
// Do not map arguments if we emit function with non-original types.
Address LocalAddr = FieldLV.getAddress(CGF);
// If we are capturing a pointer by copy we don't need to do anything, just
// use the value that we get from the arguments.
if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
const VarDecl *CurVD = I->getCapturedVar();
LocalAddrs.insert({FD, {CurVD, LocalAddr}});
++I;
continue;
}

LValue ArgLVal =
CGF.MakeAddrLValue(LocalAddr, FD->getType(), AlignmentSource::Decl);
if (FD->hasCapturedVLAType()) {
llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
const VariableArrayType *VAT = FD->getCapturedVLAType();
VLASizes.try_emplace(FD, VAT->getSizeExpr(), ExprArg);
} else if (I->capturesVariable()) {
const VarDecl *Var = I->getCapturedVar();
QualType VarTy = Var->getType();
Address ArgAddr = ArgLVal.getAddress(CGF);
if (ArgLVal.getType()->isLValueReferenceType()) {
ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
} else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
assert(ArgLVal.getType()->isPointerType());
ArgAddr = CGF.EmitLoadOfPointer(
ArgAddr, ArgLVal.getType()->castAs<PointerType>());
}
LocalAddrs.insert(
{FD,
{Var, Address(ArgAddr.getBasePointer(), ArgAddr.getElementType(),
Ctx.getDeclAlign(Var))}});
} else if (I->capturesVariableByCopy()) {
assert(!FD->getType()->isAnyPointerType() &&
"Not expecting a captured pointer.");
const VarDecl *Var = I->getCapturedVar();
Address CopyAddr = CGF.CreateMemTemp(FD->getType(), Ctx.getDeclAlign(FD),
Var->getName());
LValue CopyLVal =
CGF.MakeAddrLValue(CopyAddr, FD->getType(), AlignmentSource::Decl);

RValue ArgRVal = CGF.EmitLoadOfLValue(ArgLVal, I->getLocation());
CGF.EmitStoreThroughLValue(ArgRVal, CopyLVal);

LocalAddrs.insert({FD, {Var, CopyAddr}});
} else {
// If 'this' is captured, load it into CXXThisValue.
assert(I->capturesThis());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a message

CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
LocalAddrs.insert({FD, {nullptr, ArgLVal.getAddress(CGF)}});
}
++I;
}

return F;
}

llvm::Function *
CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
SourceLocation Loc) {
Expand Down Expand Up @@ -711,6 +807,36 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
return WrapperF;
}

llvm::Function *CodeGenFunction::GenerateOpenMPCapturedStmtFunctionAggregate(
const CapturedStmt &S, SourceLocation Loc) {
assert(
CapturedStmtInfo &&
"CapturedStmtInfo should be set when generating the captured function");
const CapturedDecl *CD = S.getCapturedDecl();
// Build the argument list.
FunctionArgList Args;
llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
StringRef FunctionName = CapturedStmtInfo->getHelperName();
llvm::Function *F = emitOutlinedFunctionPrologueAggregate(
*this, Args, LocalAddrs, VLASizes, CXXThisValue, S, Loc, FunctionName);
CodeGenFunction::OMPPrivateScope LocalScope(*this);
for (const auto &LocalAddrPair : LocalAddrs) {
if (LocalAddrPair.second.first) {
LocalScope.addPrivate(LocalAddrPair.second.first,
LocalAddrPair.second.second);
}
}
(void)LocalScope.Privatize();
for (const auto &VLASizePair : VLASizes)
VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
PGO.assignRegionCounters(GlobalDecl(CD), F);
CapturedStmtInfo->EmitBody(*this, CD->getBody());
(void)LocalScope.ForceCleanup();
FinishFunction(CD->getBodyRBrace());
return F;
}

//===----------------------------------------------------------------------===//
// OpenMP Directive Emission
//===----------------------------------------------------------------------===//
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -3643,6 +3643,9 @@ class CodeGenFunction : public CodeGenTypeCache {
Address GenerateCapturedStmtArgument(const CapturedStmt &S);
llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
SourceLocation Loc);
llvm::Function *
GenerateOpenMPCapturedStmtFunctionAggregate(const CapturedStmt &S,
SourceLocation Loc);
void GenerateOpenMPCapturedVars(const CapturedStmt &S,
SmallVectorImpl<llvm::Value *> &CapturedVars);
void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy,
Expand Down
6 changes: 3 additions & 3 deletions clang/test/OpenMP/declare_target_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ int bar() { return 1 + foo() + bar() + baz1() + baz2(); }
int maini1() {
int a;
static long aa = 32 + bbb + ccc + fff + ggg;
// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}maini1{{.*}}_l[[@LINE+1]](ptr {{.*}}, ptr noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %{{.*}}, i64 {{.*}}, i64 {{.*}})
// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}maini1{{.*}}_l[[@LINE+1]](ptr {{.*}}, ptr {{.*}})
#pragma omp target map(tofrom \
: a, b)
{
Expand All @@ -163,7 +163,7 @@ int maini1() {

int baz3() { return 2 + baz2(); }
int baz2() {
// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}baz2{{.*}}_l[[@LINE+1]](ptr {{.*}}, i64 {{.*}})
// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}baz2{{.*}}_l[[@LINE+1]](ptr {{.*}}, ptr {{.*}})
#pragma omp target parallel
++c;
return 2 + baz3();
Expand All @@ -175,7 +175,7 @@ static __typeof(create) __t_create __attribute__((__weakref__("__create")));

int baz5() {
bool a;
// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}baz5{{.*}}_l[[@LINE+1]](ptr {{.*}}, i64 {{.*}})
// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}baz5{{.*}}_l[[@LINE+1]](ptr {{.*}}, ptr {{.*}})
#pragma omp target
a = __extension__(void *) & __t_create != 0;
return a;
Expand Down
2 changes: 1 addition & 1 deletion clang/test/OpenMP/declare_target_link_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ int maini1() {
return 0;
}

// DEVICE: define weak_odr protected void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l44(ptr {{[^,]+}}, ptr noundef nonnull align {{[0-9]+}} dereferenceable{{[^,]*}}
// DEVICE: define weak_odr protected void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l44(ptr {{[^,]+}}, ptr {{[^,]*}}
// DEVICE: [[C_REF:%.+]] = load ptr, ptr @c_decl_tgt_ref_ptr,
// DEVICE: [[C:%.+]] = load i32, ptr [[C_REF]],
// DEVICE: store i32 [[C]], ptr %
Expand Down
Loading
Loading