Skip to content

Commit 0f0e31c

Browse files
author
serge-sans-paille
committed
Update inline builtin handling to honor gnu inline attribute
Per the GCC info page: If the function is declared 'extern', then this definition of the function is used only for inlining. In no case is the function compiled as a standalone function, not even if you take its address explicitly. Such an address becomes an external reference, as if you had only declared the function, and had not defined it. Respect that behavior for inline builtins: keep the original definition, and generate a copy of the declaration suffixed by '.inline' that's only referenced in direct call. This fixes holes in c3717b6. Differential Revision: https://reviews.llvm.org/D111009
1 parent a4bccf7 commit 0f0e31c

File tree

9 files changed

+142
-20
lines changed

9 files changed

+142
-20
lines changed

clang/lib/AST/Decl.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3177,7 +3177,8 @@ bool FunctionDecl::isInlineBuiltinDeclaration() const {
31773177

31783178
const FunctionDecl *Definition;
31793179
return hasBody(Definition) && Definition->isInlineSpecified() &&
3180-
Definition->hasAttr<AlwaysInlineAttr>();
3180+
Definition->hasAttr<AlwaysInlineAttr>() &&
3181+
Definition->hasAttr<GNUInlineAttr>();
31813182
}
31823183

31833184
bool FunctionDecl::isDestroyingOperatorDelete() const {

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4891,12 +4891,28 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) {
48914891
const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
48924892

48934893
if (auto builtinID = FD->getBuiltinID()) {
4894-
// Replaceable builtin provide their own implementation of a builtin. Unless
4895-
// we are in the builtin implementation itself, don't call the actual
4896-
// builtin. If we are in the builtin implementation, avoid trivial infinite
4894+
std::string FDInlineName = (FD->getName() + ".inline").str();
4895+
// When directing calling an inline builtin, call it through it's mangled
4896+
// name to make it clear it's not the actual builtin.
4897+
if (FD->isInlineBuiltinDeclaration() &&
4898+
CGF.CurFn->getName() != FDInlineName) {
4899+
llvm::Constant *CalleePtr = EmitFunctionDeclPointer(CGF.CGM, GD);
4900+
llvm::Function *Fn = llvm::cast<llvm::Function>(CalleePtr);
4901+
llvm::Module *M = Fn->getParent();
4902+
llvm::Function *Clone = M->getFunction(FDInlineName);
4903+
if (!Clone) {
4904+
Clone = llvm::Function::Create(Fn->getFunctionType(),
4905+
llvm::GlobalValue::InternalLinkage,
4906+
Fn->getAddressSpace(), FDInlineName, M);
4907+
Clone->addFnAttr(llvm::Attribute::AlwaysInline);
4908+
}
4909+
return CGCallee::forDirect(Clone, GD);
4910+
}
4911+
4912+
// Replaceable builtins provide their own implementation of a builtin. If we
4913+
// are in an inline builtin implementation, avoid trivial infinite
48974914
// recursion.
4898-
if (!FD->isInlineBuiltinDeclaration() ||
4899-
CGF.CurFn->getName() == FD->getName())
4915+
else
49004916
return CGCallee::forBuiltin(builtinID, FD);
49014917
}
49024918

@@ -4905,6 +4921,7 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) {
49054921
FD->hasAttr<CUDAGlobalAttr>())
49064922
CalleePtr = CGF.CGM.getCUDARuntime().getKernelStub(
49074923
cast<llvm::GlobalValue>(CalleePtr->stripPointerCasts()));
4924+
49084925
return CGCallee::forDirect(CalleePtr, GD);
49094926
}
49104927

clang/lib/CodeGen/CodeGenFunction.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include "llvm/Support/CRC.h"
4646
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
4747
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
48+
4849
using namespace clang;
4950
using namespace CodeGen;
5051

@@ -1294,10 +1295,22 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
12941295
FunctionArgList Args;
12951296
QualType ResTy = BuildFunctionArgList(GD, Args);
12961297

1297-
// Give a different name to inline builtin to avoid conflict with actual
1298-
// builtins.
1299-
if (FD->isInlineBuiltinDeclaration() && Fn)
1300-
Fn->setName(Fn->getName() + ".inline");
1298+
// When generating code for a builtin with an inline declaration, use a
1299+
// mangled name to hold the actual body, while keeping an external definition
1300+
// in case the function pointer is referenced somewhere.
1301+
if (FD->isInlineBuiltinDeclaration() && Fn) {
1302+
std::string FDInlineName = (Fn->getName() + ".inline").str();
1303+
llvm::Module *M = Fn->getParent();
1304+
llvm::Function *Clone = M->getFunction(FDInlineName);
1305+
if (!Clone) {
1306+
Clone = llvm::Function::Create(Fn->getFunctionType(),
1307+
llvm::GlobalValue::InternalLinkage,
1308+
Fn->getAddressSpace(), FDInlineName, M);
1309+
Clone->addFnAttr(llvm::Attribute::AlwaysInline);
1310+
}
1311+
Fn->setLinkage(llvm::GlobalValue::ExternalLinkage);
1312+
Fn = Clone;
1313+
}
13011314

13021315
// Check if we should generate debug info for this function.
13031316
if (FD->hasAttr<NoDebugAttr>()) {
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
3+
// RUN: %clang_cc1 -triple x86_64 -S -emit-llvm -o - %s | opt -S -verify | FileCheck %s
4+
//
5+
// Verifies that clang detects memcmp inline version and uses it instead of the builtin.
6+
7+
typedef unsigned long size_t;
8+
9+
// Clang requires these attributes for a function to be redefined.
10+
#define AVAILABLE_EXTERNALLY extern inline __attribute__((always_inline)) __attribute__((gnu_inline))
11+
12+
const void *con_unify_unimap_p1;
13+
14+
AVAILABLE_EXTERNALLY
15+
int memcmp(const void *p, const void *q, unsigned long size) {
16+
return __builtin_memcmp(p, q, size);
17+
}
18+
19+
// CHECK-LABEL: @con_unify_unimap_q1(
20+
// CHECK-NEXT: entry:
21+
// CHECK-NEXT: [[P_ADDR_I:%.*]] = alloca i8*, align 8
22+
// CHECK-NEXT: [[Q_ADDR_I:%.*]] = alloca i8*, align 8
23+
// CHECK-NEXT: [[SIZE_ADDR_I:%.*]] = alloca i64, align 8
24+
// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** @con_unify_unimap_p1, align 8
25+
// CHECK-NEXT: store i8* [[TMP0]], i8** [[P_ADDR_I]], align 8
26+
// CHECK-NEXT: store i8* bitcast (i32 ()* @con_unify_unimap_q1 to i8*), i8** [[Q_ADDR_I]], align 8
27+
// CHECK-NEXT: store i64 4, i64* [[SIZE_ADDR_I]], align 8
28+
// CHECK-NEXT: [[TMP1:%.*]] = load i8*, i8** [[P_ADDR_I]], align 8
29+
// CHECK-NEXT: [[TMP2:%.*]] = load i8*, i8** [[Q_ADDR_I]], align 8
30+
// CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[SIZE_ADDR_I]], align 8
31+
// CHECK-NEXT: [[CALL_I:%.*]] = call i32 @memcmp(i8* [[TMP1]], i8* [[TMP2]], i64 [[TMP3]]) #[[ATTR3:[0-9]+]]
32+
// CHECK-NEXT: ret i32 [[CALL_I]]
33+
//
34+
int con_unify_unimap_q1(void) {
35+
return memcmp(con_unify_unimap_p1, con_unify_unimap_q1, sizeof(int));
36+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// RUN: %clang_cc1 -triple x86_64 -S -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
2+
//
3+
// Verifies that clang-generated *.inline are flagged as internal.
4+
5+
typedef unsigned long size_t;
6+
7+
#define AVAILABLE_EXTERNALLY extern inline __attribute__((always_inline)) __attribute__((gnu_inline))
8+
// Clang recognizes an inline builtin and renames it to memcmp.inline to prevent conflict with builtins.
9+
AVAILABLE_EXTERNALLY int memcmp(const void *a, const void *b, size_t c) {
10+
return __builtin_memcmp(a, b, c);
11+
}
12+
13+
// CHECK: internal{{.*}}memcmp.inline
14+
int bar(const void *a, const void *b, size_t c) {
15+
return memcmp(a, b, c);
16+
}
17+
18+
// Note that extern has been omitted here.
19+
#define TRIPLE_INLINE inline __attribute__((always_inline)) __attribute__((gnu_inline))
20+
21+
// Clang recognizes an inline builtin and renames it to memcpy.inline to prevent conflict with builtins.
22+
TRIPLE_INLINE void *memcpy(void *a, const void *b, size_t c) {
23+
return __builtin_memcpy(a, b, c);
24+
}
25+
26+
// CHECK: internal{{.*}}memcpy.inline
27+
void *foo(void *a, const void *b, size_t c) {
28+
return memcpy(a, b, c);
29+
}

clang/test/CodeGen/memcpy-inline-builtin.c

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,39 @@ AVAILABLE_EXTERNALLY void *memcpy(void *a, const void *b, size_t c) {
3232
// CHECK-NEXT: store i8* [[TMP0]], i8** [[A_ADDR_I]], align 8
3333
// CHECK-NEXT: store i8* [[TMP1]], i8** [[B_ADDR_I]], align 8
3434
// CHECK-NEXT: store i64 [[TMP2]], i64* [[C_ADDR_I]], align 8
35-
// CHECK-NEXT: call void asm sideeffect "# memcpy.inline marker", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR2:[0-9]+]], !srcloc !2
35+
// CHECK-NEXT: call void asm sideeffect "# memcpy.inline marker", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR4:[0-9]+]], !srcloc !2
3636
// CHECK-NEXT: [[TMP3:%.*]] = load i8*, i8** [[A_ADDR_I]], align 8
3737
// CHECK-NEXT: [[TMP4:%.*]] = load i8*, i8** [[B_ADDR_I]], align 8
3838
// CHECK-NEXT: [[TMP5:%.*]] = load i64, i64* [[C_ADDR_I]], align 8
39-
// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP3]], i8* align 1 [[TMP4]], i64 [[TMP5]], i1 false) #[[ATTR2]]
39+
// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP3]], i8* align 1 [[TMP4]], i64 [[TMP5]], i1 false) #[[ATTR4]]
40+
// CHECK-NEXT: ret i8* [[TMP3]]
41+
//
42+
void *foo(void *a, const void *b, size_t c) {
43+
return memcpy(a, b, c);
44+
}
45+
46+
// CHECK-LABEL: @bar(
47+
// CHECK-NEXT: entry:
48+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i8*, align 8
49+
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i8*, align 8
50+
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8
51+
// CHECK-NEXT: [[CPY:%.*]] = alloca i8* (i8*, i8*, i64)*, align 8
52+
// CHECK-NEXT: store i8* [[A:%.*]], i8** [[A_ADDR]], align 8
53+
// CHECK-NEXT: store i8* [[B:%.*]], i8** [[B_ADDR]], align 8
54+
// CHECK-NEXT: store i64 [[C:%.*]], i64* [[C_ADDR]], align 8
55+
// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[C_ADDR]], align 8
56+
// CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 10
57+
// CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[CMP]] to i64
58+
// CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i8* (i8*, i8*, i64)* @memcpy, i8* (i8*, i8*, i64)* @foo
59+
// CHECK-NEXT: store i8* (i8*, i8*, i64)* [[COND]], i8* (i8*, i8*, i64)** [[CPY]], align 8
60+
// CHECK-NEXT: [[TMP2:%.*]] = load i8* (i8*, i8*, i64)*, i8* (i8*, i8*, i64)** [[CPY]], align 8
61+
// CHECK-NEXT: [[TMP3:%.*]] = load i8*, i8** [[A_ADDR]], align 8
62+
// CHECK-NEXT: [[TMP4:%.*]] = load i8*, i8** [[B_ADDR]], align 8
63+
// CHECK-NEXT: [[TMP5:%.*]] = load i64, i64* [[C_ADDR]], align 8
64+
// CHECK-NEXT: [[CALL:%.*]] = call i8* [[TMP2]](i8* [[TMP3]], i8* [[TMP4]], i64 [[TMP5]])
4065
// CHECK-NEXT: ret void
4166
//
42-
void foo(void *a, const void *b, size_t c) {
43-
memcpy(a, b, c);
67+
void bar(void *a, const void *b, size_t c) {
68+
void *(*cpy)(void *, const void *, size_t) = c > 10 ? memcpy : foo;
69+
cpy(a, b, c);
4470
}

clang/test/CodeGen/memcpy-nobuiltin.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
// RUN: %clang_cc1 -verify -S -emit-llvm -o- %s -isystem %S -DWITH_DECL | FileCheck --check-prefix=CHECK-WITH-DECL %s
22
// RUN: %clang_cc1 -verify -S -emit-llvm -o- %s -isystem %S -UWITH_DECL | FileCheck --check-prefix=CHECK-NO-DECL %s
3-
// RUN: %clang_cc1 -verify -S -emit-llvm -o- %s -isystem %S -DWITH_SELF_REFERENCE_DECL | FileCheck --check-prefix=CHECK-SELF-REF-DECL %s
3+
// RUN: %clang_cc1 -verify -S -emit-llvm -o- %s -isystem %S -disable-llvm-passes -DWITH_SELF_REFERENCE_DECL | FileCheck --check-prefix=CHECK-SELF-REF-DECL %s
44
//
55
// CHECK-WITH-DECL-NOT: @llvm.memcpy
66
// CHECK-NO-DECL: @llvm.memcpy
77
// CHECK-SELF-REF-DECL-LABEL: define {{.*}}i8* @memcpy.inline
8-
// CHECK-SELF-REF-DECL: @memcpy(
8+
// CHECK-SELF-REF-DECL: @llvm.memcpy.{{.*}}(
99
//
1010
#include <memcpy-nobuiltin.inc>
1111
void test(void *dest, void const *from, size_t n) {

clang/test/CodeGen/memcpy-nobuiltin.inc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
extern void *memcpy(void *dest, void const *from, size_t n);
33

44
#ifdef WITH_DECL
5-
inline __attribute__((always_inline)) void *memcpy(void *dest, void const *from, size_t n) {
5+
inline __attribute__((always_inline)) __attribute__((gnu_inline)) void *memcpy(void *dest, void const *from, size_t n) {
66
char const *ifrom = from;
77
char *idest = dest;
88
while (n--)
@@ -11,7 +11,7 @@ inline __attribute__((always_inline)) void *memcpy(void *dest, void const *from,
1111
}
1212
#endif
1313
#ifdef WITH_SELF_REFERENCE_DECL
14-
inline __attribute__((always_inline)) void *memcpy(void *dest, void const *from, size_t n) {
14+
inline __attribute__((always_inline)) __attribute__((gnu_inline)) void *memcpy(void *dest, void const *from, size_t n) {
1515
if (n != 0)
1616
memcpy(dest, from, n);
1717
return dest;

clang/test/CodeGen/pr9614.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,14 @@ void f(void) {
3232

3333
// CHECK-LABEL: define{{.*}} void @f()
3434
// CHECK: call void @foo()
35-
// CHECK: call i32 @abs(i32 %0)
35+
// CHECK: call i32 @abs(i32 0)
3636
// CHECK: call i8* @strrchr(
3737
// CHECK: call void @llvm.prefetch.p0i8(
3838
// CHECK: call i8* @memchr(
3939
// CHECK: ret void
4040

4141
// CHECK: declare void @foo()
42+
// CHECK: declare i32 @abs(i32
4243
// CHECK: declare i8* @strrchr(i8*, i32)
4344
// CHECK: declare i8* @memchr(
44-
// CHECK: declare i32 @abs(i32
4545
// CHECK: declare void @llvm.prefetch.p0i8(

0 commit comments

Comments
 (0)