Skip to content

Commit b0c4093

Browse files
committed
merge main into amd-staging
lands and reverts internally d36f66b [NFC][offload][OMPT] Cleanup of OMPT internals (llvm#109005) downstream gerrit review contains the content we want landed. Change-Id: I9aebe6a0aa1775d85f7225642bcb6bdc6c5de41f
2 parents 06a9bfa + cff753f commit b0c4093

File tree

148 files changed

+41816
-14537
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

148 files changed

+41816
-14537
lines changed

bolt/include/bolt/Rewrite/RewriteInstance.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -510,12 +510,11 @@ class RewriteInstance {
510510
};
511511

512512
/// Different types of X86-64 PLT sections.
513-
const PLTSectionInfo X86_64_PLTSections[4] = {
514-
{ ".plt", 16 },
515-
{ ".plt.got", 8 },
516-
{ ".plt.sec", 8 },
517-
{ nullptr, 0 }
518-
};
513+
const PLTSectionInfo X86_64_PLTSections[5] = {{".plt", 16},
514+
{".plt.got", 8},
515+
{".plt.sec", 8},
516+
{".iplt", 16},
517+
{nullptr, 0}};
519518

520519
/// AArch64 PLT sections.
521520
const PLTSectionInfo AArch64_PLTSections[4] = {

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1533,7 +1533,7 @@ void RewriteInstance::createPLTBinaryFunction(uint64_t TargetAddress,
15331533

15341534
MCSymbol *Symbol = Rel->Symbol;
15351535
if (!Symbol) {
1536-
if (!BC->isAArch64() || !Rel->Addend || !Rel->isIRelative())
1536+
if (BC->isRISCV() || !Rel->Addend || !Rel->isIRelative())
15371537
return;
15381538

15391539
// IFUNC trampoline without symbol
Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
1-
// This test checks that IFUNC trampoline is properly recognised by BOLT
2-
31
// With -O0 indirect call is performed on IPLT trampoline. IPLT trampoline
42
// has IFUNC symbol.
5-
// RUN: %clang %cflags -nostdlib -O0 -no-pie %s -fuse-ld=lld \
3+
// RUN: %clang %cflags -nostdlib -O0 -no-pie %p/../Inputs/ifunc.c -fuse-ld=lld \
64
// RUN: -o %t.O0.exe -Wl,-q
75
// RUN: llvm-bolt %t.O0.exe -o %t.O0.bolt.exe \
86
// RUN: --print-disasm --print-only=_start | \
@@ -12,7 +10,7 @@
1210

1311
// Non-pie static executable doesn't generate PT_DYNAMIC, check relocation
1412
// is readed successfully and IPLT trampoline has been identified by bolt.
15-
// RUN: %clang %cflags -nostdlib -O3 %s -fuse-ld=lld -no-pie \
13+
// RUN: %clang %cflags -nostdlib -O3 %p/../Inputs/ifunc.c -fuse-ld=lld -no-pie \
1614
// RUN: -o %t.O3_nopie.exe -Wl,-q
1715
// RUN: llvm-readelf -l %t.O3_nopie.exe | \
1816
// RUN: FileCheck --check-prefix=NON_DYN_CHECK %s
@@ -25,7 +23,7 @@
2523
// With -O3 direct call is performed on IPLT trampoline. IPLT trampoline
2624
// doesn't have associated symbol. The ifunc symbol has the same address as
2725
// IFUNC resolver function.
28-
// RUN: %clang %cflags -nostdlib -O3 %s -fuse-ld=lld -fPIC -pie \
26+
// RUN: %clang %cflags -nostdlib -O3 %p/../Inputs/ifunc.c -fuse-ld=lld -fPIC -pie \
2927
// RUN: -o %t.O3_pie.exe -Wl,-q
3028
// RUN: llvm-bolt %t.O3_pie.exe -o %t.O3_pie.bolt.exe \
3129
// RUN: --print-disasm --print-only=_start | \
@@ -35,8 +33,8 @@
3533

3634
// Check that IPLT trampoline located in .plt section are normally handled by
3735
// BOLT. The gnu-ld linker doesn't use separate .iplt section.
38-
// RUN: %clang %cflags -nostdlib -O3 %s -fuse-ld=lld -fPIC -pie \
39-
// RUN: -T %p/Inputs/iplt.ld -o %t.iplt_O3_pie.exe -Wl,-q
36+
// RUN: %clang %cflags -nostdlib -O3 %p/../Inputs/ifunc.c -fuse-ld=lld -fPIC -pie \
37+
// RUN: -T %p/../Inputs/iplt.ld -o %t.iplt_O3_pie.exe -Wl,-q
4038
// RUN: llvm-bolt %t.iplt_O3_pie.exe -o %t.iplt_O3_pie.bolt.exe \
4139
// RUN: --print-disasm --print-only=_start | \
4240
// RUN: FileCheck --check-prefix=CHECK %s
@@ -49,14 +47,3 @@
4947

5048
// REL_CHECK: R_AARCH64_IRELATIVE [[#%x,REL_SYMB_ADDR:]]
5149
// REL_CHECK: [[#REL_SYMB_ADDR]] {{.*}} FUNC {{.*}} resolver_foo
52-
53-
static void foo() {}
54-
static void bar() {}
55-
56-
extern int use_foo;
57-
58-
static void *resolver_foo(void) { return use_foo ? foo : bar; }
59-
60-
__attribute__((ifunc("resolver_foo"))) void ifoo();
61-
62-
void _start() { ifoo(); }

bolt/test/Inputs/ifunc.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// This test checks that IFUNC trampoline is properly recognised by BOLT
2+
3+
static void foo() {}
4+
static void bar() {}
5+
6+
extern int use_foo;
7+
8+
static void *resolver_foo(void) { return use_foo ? foo : bar; }
9+
10+
__attribute__((ifunc("resolver_foo"))) void ifoo();
11+
12+
void _start() { ifoo(); }
File renamed without changes.

bolt/test/X86/ifunc.test

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
// Check if BOLT can process ifunc symbols from .plt section
2+
// RUN: %clang %cflags -nostdlib -no-pie %p/../Inputs/ifunc.c -fuse-ld=lld \
3+
// RUN: -o %t.exe -Wl,-q
4+
// RUN: llvm-bolt %t.exe -o %t.bolt.exe \
5+
// RUN: --print-disasm --print-only=_start | \
6+
// RUN: FileCheck --check-prefix=CHECK %s
7+
// RUN: llvm-readelf -aW %t.bolt.exe | \
8+
// RUN: FileCheck --check-prefix=REL_CHECK %s
9+
10+
// Check if BOLT can process ifunc symbols from .plt section in non-pie static
11+
// executable case.
12+
// RUN: %clang %cflags -nostdlib %p/../Inputs/ifunc.c -fuse-ld=lld -no-pie \
13+
// RUN: -o %t.nopie.exe -Wl,-q
14+
// RUN: llvm-readelf -l %t.nopie.exe | \
15+
// RUN: FileCheck --check-prefix=NON_DYN_CHECK %s
16+
// RUN: llvm-bolt %t.nopie.exe -o %t.nopie.bolt.exe \
17+
// RUN: --print-disasm --print-only=_start | \
18+
// RUN: FileCheck --check-prefix=CHECK %s
19+
// RUN: llvm-readelf -aW %t.nopie.bolt.exe | \
20+
// RUN: FileCheck --check-prefix=REL_CHECK %s
21+
22+
// Check if BOLT can process ifunc symbols from .plt section in pie executable
23+
// case.
24+
// RUN: %clang %cflags -nostdlib %p/../Inputs/ifunc.c -fuse-ld=lld -fPIC -pie \
25+
// RUN: -o %t.pie.exe -Wl,-q
26+
// RUN: llvm-bolt %t.pie.exe -o %t.pie.bolt.exe \
27+
// RUN: --print-disasm --print-only=_start | \
28+
// RUN: FileCheck --check-prefix=CHECK %s
29+
// RUN: llvm-readelf -aW %t.pie.bolt.exe | \
30+
// RUN: FileCheck --check-prefix=REL_CHECK %s
31+
32+
// Check that IPLT trampoline located in .plt section are normally handled by
33+
// BOLT. The gnu-ld linker doesn't use separate .iplt section.
34+
// RUN: %clang %cflags -nostdlib %p/../Inputs/ifunc.c -fuse-ld=lld -fPIC -pie \
35+
// RUN: -T %p/../Inputs/iplt.ld -o %t.iplt_pie.exe -Wl,-q
36+
// RUN: llvm-bolt %t.iplt_pie.exe -o %t.iplt_pie.bolt.exe \
37+
// RUN: --print-disasm --print-only=_start | \
38+
// RUN: FileCheck --check-prefix=CHECK %s
39+
// RUN: llvm-readelf -aW %t.iplt_pie.bolt.exe | \
40+
// RUN: FileCheck --check-prefix=REL_CHECK %s
41+
42+
// NON_DYN_CHECK-NOT: DYNAMIC
43+
44+
// CHECK: callq "resolver_foo/1@PLT"
45+
46+
// REL_CHECK: R_X86_64_IRELATIVE [[#%x,REL_SYMB_ADDR:]]
47+
// REL_CHECK: [[#REL_SYMB_ADDR]] {{.*}} FUNC {{.*}} resolver_foo

clang/docs/ReleaseNotes.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,11 @@ Attribute Changes in Clang
293293
- Introduced a new attribute ``[[clang::coro_await_elidable_argument]]`` on function parameters
294294
to propagate safe elide context to arguments if such function is also under a safe elide context.
295295

296+
- The documentation of the ``[[clang::musttail]]`` attribute was updated to
297+
note that the lifetimes of all local variables end before the call. This does
298+
not change the behaviour of the compiler, as this was true for previous
299+
versions.
300+
296301
Improvements to Clang's diagnostics
297302
-----------------------------------
298303

@@ -349,6 +354,10 @@ Improvements to Clang's diagnostics
349354

350355
- Don't emit bogus dangling diagnostics when ``[[gsl::Owner]]`` and `[[clang::lifetimebound]]` are used together (#GH108272).
351356

357+
- The ``-Wreturn-stack-address`` warning now also warns about addresses of
358+
local variables passed to function calls using the ``[[clang::musttail]]``
359+
attribute.
360+
352361
Improvements to Clang's time-trace
353362
----------------------------------
354363

clang/include/clang/Basic/AttrDocs.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,12 @@ return value must be trivially destructible. The calling convention of the
637637
caller and callee must match, and they must not be variadic functions or have
638638
old style K&R C function declarations.
639639

640+
The lifetimes of all local variables and function parameters end immediately
641+
before the call to the function. This means that it is undefined behaviour to
642+
pass a pointer or reference to a local variable to the called function, which
643+
is not the case without the attribute. Clang will emit a warning in common
644+
cases where this happens.
645+
640646
``clang::musttail`` provides assurances that the tail call can be optimized on
641647
all targets, not just one.
642648
}];

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10104,11 +10104,15 @@ def err_lifetimebound_ctor_dtor : Error<
1010410104
// CHECK: returning address/reference of stack memory
1010510105
def warn_ret_stack_addr_ref : Warning<
1010610106
"%select{address of|reference to}0 stack memory associated with "
10107-
"%select{local variable|parameter|compound literal}2 %1 returned">,
10107+
"%select{local variable|parameter|compound literal}2 %1 "
10108+
"%select{returned|passed to musttail function}3">,
1010810109
InGroup<ReturnStackAddress>;
1010910110
def warn_ret_local_temp_addr_ref : Warning<
1011010111
"returning %select{address of|reference to}0 local temporary object">,
1011110112
InGroup<ReturnStackAddress>;
10113+
def warn_musttail_local_temp_addr_ref : Warning<
10114+
"passing %select{address of|reference to}0 local temporary object to musttail function">,
10115+
InGroup<ReturnStackAddress>;
1011210116
def err_ret_local_temp_ref : Error<
1011310117
"returning reference to local temporary object">;
1011410118
def warn_ret_addr_label : Warning<

clang/include/clang/Driver/Options.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6471,8 +6471,10 @@ def mcx16 : Flag<["-"], "mcx16">, Group<m_x86_Features_Group>;
64716471
def mno_cx16 : Flag<["-"], "mno-cx16">, Group<m_x86_Features_Group>;
64726472
def menqcmd : Flag<["-"], "menqcmd">, Group<m_x86_Features_Group>;
64736473
def mno_enqcmd : Flag<["-"], "mno-enqcmd">, Group<m_x86_Features_Group>;
6474-
def mevex512 : Flag<["-"], "mevex512">, Group<m_x86_Features_Group>;
6475-
def mno_evex512 : Flag<["-"], "mno-evex512">, Group<m_x86_Features_Group>;
6474+
def mevex512 : Flag<["-"], "mevex512">, Group<m_x86_Features_Group>,
6475+
Visibility<[ClangOption, CLOption, FlangOption]>;
6476+
def mno_evex512 : Flag<["-"], "mno-evex512">, Group<m_x86_Features_Group>,
6477+
Visibility<[ClangOption, CLOption, FlangOption]>;
64766478
def mf16c : Flag<["-"], "mf16c">, Group<m_x86_Features_Group>;
64776479
def mno_f16c : Flag<["-"], "mno-f16c">, Group<m_x86_Features_Group>;
64786480
def mfma : Flag<["-"], "mfma">, Group<m_x86_Features_Group>;

clang/include/clang/Frontend/MultiplexConsumer.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ class MultiplexConsumer : public SemaConsumer {
5353
public:
5454
// Takes ownership of the pointers in C.
5555
MultiplexConsumer(std::vector<std::unique_ptr<ASTConsumer>> C);
56+
MultiplexConsumer(std::unique_ptr<ASTConsumer> C);
5657
~MultiplexConsumer() override;
5758

5859
// ASTConsumer
@@ -80,7 +81,7 @@ class MultiplexConsumer : public SemaConsumer {
8081
void InitializeSema(Sema &S) override;
8182
void ForgetSema() override;
8283

83-
private:
84+
protected:
8485
std::vector<std::unique_ptr<ASTConsumer>> Consumers; // Owns these.
8586
std::unique_ptr<MultiplexASTMutationListener> MutationListener;
8687
std::unique_ptr<MultiplexASTDeserializationListener> DeserializationListener;

clang/include/clang/Interpreter/Interpreter.h

Lines changed: 30 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,9 @@
1414
#ifndef LLVM_CLANG_INTERPRETER_INTERPRETER_H
1515
#define LLVM_CLANG_INTERPRETER_INTERPRETER_H
1616

17-
#include "clang/AST/Decl.h"
1817
#include "clang/AST/GlobalDecl.h"
1918
#include "clang/Interpreter/PartialTranslationUnit.h"
2019
#include "clang/Interpreter/Value.h"
21-
#include "clang/Sema/Ownership.h"
2220

2321
#include "llvm/ADT/DenseMap.h"
2422
#include "llvm/ExecutionEngine/JITSymbol.h"
@@ -38,6 +36,9 @@ class ThreadSafeContext;
3836
namespace clang {
3937

4038
class CompilerInstance;
39+
class CodeGenerator;
40+
class CXXRecordDecl;
41+
class Decl;
4142
class IncrementalExecutor;
4243
class IncrementalParser;
4344

@@ -77,42 +78,45 @@ class IncrementalCompilerBuilder {
7778
llvm::StringRef CudaSDKPath;
7879
};
7980

80-
/// Generate glue code between the Interpreter's built-in runtime and user code.
81-
class RuntimeInterfaceBuilder {
82-
public:
83-
virtual ~RuntimeInterfaceBuilder() = default;
84-
85-
using TransformExprFunction = ExprResult(RuntimeInterfaceBuilder *Builder,
86-
Expr *, ArrayRef<Expr *>);
87-
virtual TransformExprFunction *getPrintValueTransformer() = 0;
88-
};
81+
class IncrementalAction;
82+
class InProcessPrintingASTConsumer;
8983

9084
/// Provides top-level interfaces for incremental compilation and execution.
9185
class Interpreter {
86+
friend class Value;
87+
friend InProcessPrintingASTConsumer;
88+
9289
std::unique_ptr<llvm::orc::ThreadSafeContext> TSCtx;
90+
/// Long-lived, incremental parsing action.
91+
std::unique_ptr<IncrementalAction> Act;
9392
std::unique_ptr<IncrementalParser> IncrParser;
9493
std::unique_ptr<IncrementalExecutor> IncrExecutor;
95-
std::unique_ptr<RuntimeInterfaceBuilder> RuntimeIB;
9694

9795
// An optional parser for CUDA offloading
9896
std::unique_ptr<IncrementalParser> DeviceParser;
9997

98+
/// List containing information about each incrementally parsed piece of code.
99+
std::list<PartialTranslationUnit> PTUs;
100+
100101
unsigned InitPTUSize = 0;
101102

102103
// This member holds the last result of the value printing. It's a class
103104
// member because we might want to access it after more inputs. If no value
104105
// printing happens, it's in an invalid state.
105106
Value LastValue;
106107

107-
// Add a call to an Expr to report its result. We query the function from
108-
// RuntimeInterfaceBuilder once and store it as a function pointer to avoid
109-
// frequent virtual function calls.
110-
RuntimeInterfaceBuilder::TransformExprFunction *AddPrintValueCall = nullptr;
108+
/// When CodeGen is created the first llvm::Module gets cached in many places
109+
/// and we must keep it alive.
110+
std::unique_ptr<llvm::Module> CachedInCodeGenModule;
111+
112+
/// Compiler instance performing the incremental compilation.
113+
std::unique_ptr<CompilerInstance> CI;
111114

112115
protected:
113116
// Derived classes can use an extended interface of the Interpreter.
114-
Interpreter(std::unique_ptr<CompilerInstance> CI, llvm::Error &Err,
115-
std::unique_ptr<llvm::orc::LLJITBuilder> JITBuilder = nullptr);
117+
Interpreter(std::unique_ptr<CompilerInstance> Instance, llvm::Error &Err,
118+
std::unique_ptr<llvm::orc::LLJITBuilder> JITBuilder = nullptr,
119+
std::unique_ptr<clang::ASTConsumer> Consumer = nullptr);
116120

117121
// Create the internal IncrementalExecutor, or re-create it after calling
118122
// ResetExecutor().
@@ -122,15 +126,8 @@ class Interpreter {
122126
// JIT engine. In particular, it doesn't run cleanup or destructors.
123127
void ResetExecutor();
124128

125-
// Lazily construct the RuntimeInterfaceBuilder. The provided instance will be
126-
// used for the entire lifetime of the interpreter. The default implementation
127-
// targets the in-process __clang_Interpreter runtime. Override this to use a
128-
// custom runtime.
129-
virtual std::unique_ptr<RuntimeInterfaceBuilder> FindRuntimeInterface();
130-
131129
public:
132130
virtual ~Interpreter();
133-
134131
static llvm::Expected<std::unique_ptr<Interpreter>>
135132
create(std::unique_ptr<CompilerInstance> CI);
136133
static llvm::Expected<std::unique_ptr<Interpreter>>
@@ -145,7 +142,6 @@ class Interpreter {
145142
llvm::Expected<PartialTranslationUnit &> Parse(llvm::StringRef Code);
146143
llvm::Error Execute(PartialTranslationUnit &T);
147144
llvm::Error ParseAndExecute(llvm::StringRef Code, Value *V = nullptr);
148-
llvm::Expected<llvm::orc::ExecutorAddr> CompileDtorCall(CXXRecordDecl *CXXRD);
149145

150146
/// Undo N previous incremental inputs.
151147
llvm::Error Undo(unsigned N = 1);
@@ -167,8 +163,6 @@ class Interpreter {
167163
llvm::Expected<llvm::orc::ExecutorAddr>
168164
getSymbolAddressFromLinkerName(llvm::StringRef LinkerName) const;
169165

170-
enum InterfaceKind { NoAlloc, WithAlloc, CopyArray, NewTag };
171-
172166
const llvm::SmallVectorImpl<Expr *> &getValuePrintingInfo() const {
173167
return ValuePrintingInfo;
174168
}
@@ -178,7 +172,15 @@ class Interpreter {
178172
private:
179173
size_t getEffectivePTUSize() const;
180174
void markUserCodeStart();
175+
llvm::Expected<Expr *> ExtractValueFromExpr(Expr *E);
176+
llvm::Expected<llvm::orc::ExecutorAddr> CompileDtorCall(CXXRecordDecl *CXXRD);
177+
178+
CodeGenerator *getCodeGen() const;
179+
std::unique_ptr<llvm::Module> GenModule();
180+
PartialTranslationUnit &RegisterPTU(TranslationUnitDecl *TU);
181181

182+
// A cache for the compiled destructors used to for de-allocation of managed
183+
// clang::Values.
182184
llvm::DenseMap<CXXRecordDecl *, llvm::orc::ExecutorAddr> Dtors;
183185

184186
llvm::SmallVector<Expr *, 4> ValuePrintingInfo;

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1306,7 +1306,16 @@ static bool interp__builtin_operator_new(InterpState &S, CodePtr OpPC,
13061306
return false;
13071307
}
13081308

1309-
// FIXME: CheckArraySize for NumElems?
1309+
// NB: The same check we're using in CheckArraySize()
1310+
if (NumElems.getActiveBits() >
1311+
ConstantArrayType::getMaxSizeBits(S.getASTContext()) ||
1312+
NumElems.ugt(Descriptor::MaxArrayElemBytes / ElemSize.getQuantity())) {
1313+
// FIXME: NoThrow check?
1314+
const SourceInfo &Loc = S.Current->getSource(OpPC);
1315+
S.FFDiag(Loc, diag::note_constexpr_new_too_large)
1316+
<< NumElems.getZExtValue();
1317+
return false;
1318+
}
13101319

13111320
std::optional<PrimType> ElemT = S.getContext().classify(ElemType);
13121321
DynamicAllocator &Allocator = S.getAllocator();

0 commit comments

Comments
 (0)