Skip to content

Commit a88bc31

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:fa9b1be45088dce1e4b602d451f118128b94237b into amd-gfx:fb552d673f5b
Local branch amd-gfx fb552d6 Merged main:e578314c049bb9ae6dc3983db5cf27513e29517b into amd-gfx:f40f282b2831 Remote branch main fa9b1be [ThinLTO]Mark referencers of local ifunc not eligible for import (llvm#92431)
2 parents fb552d6 + fa9b1be commit a88bc31

File tree

14 files changed

+350
-245
lines changed

14 files changed

+350
-245
lines changed

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,8 @@ enum TypeEvaluationKind {
137137
SANITIZER_CHECK(SubOverflow, sub_overflow, 0) \
138138
SANITIZER_CHECK(TypeMismatch, type_mismatch, 1) \
139139
SANITIZER_CHECK(AlignmentAssumption, alignment_assumption, 0) \
140-
SANITIZER_CHECK(VLABoundNotPositive, vla_bound_not_positive, 0)
140+
SANITIZER_CHECK(VLABoundNotPositive, vla_bound_not_positive, 0) \
141+
SANITIZER_CHECK(BoundsSafety, bounds_safety, 0)
141142

142143
enum SanitizerHandler {
143144
#define SANITIZER_CHECK(Enum, Name, Version) Enum,

clang/test/SemaCXX/cxx20-ctad-type-alias.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: %clang_cc1 -fsyntax-only -Wno-c++11-narrowing -Wno-literal-conversion -std=c++20 -verify %s
1+
// RUN: %clang_cc1 -fsyntax-only -triple x86_64-unknown-linux -Wno-c++11-narrowing -Wno-literal-conversion -std=c++20 -verify %s
22

33
namespace test1 {
44
template <typename T>
@@ -74,7 +74,7 @@ struct Foo {
7474
template <typename T>
7575
using AF = Foo<T, 1>;
7676

77-
AF b{0};
77+
AF b{0};
7878
} // namespace test6
7979

8080
namespace test7 {
@@ -86,8 +86,8 @@ struct Foo {
8686
template <typename U>
8787
using AF1 = Foo<U>;
8888
template <typename K>
89-
using AF2 = AF1<K>;
90-
AF2 b = 1;
89+
using AF2 = AF1<K>;
90+
AF2 b = 1;
9191
} // namespace test7
9292

9393
namespace test8 {
@@ -149,7 +149,7 @@ namespace test12 {
149149
template<typename X>
150150
struct Foo {
151151
template<typename K>
152-
struct Bar {
152+
struct Bar {
153153
Bar(K);
154154
};
155155

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 498973
19+
#define LLVM_MAIN_REVISION 498980
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/include/llvm/TableGen/Record.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2098,7 +2098,7 @@ class RecordKeeper {
20982098
/// Sorting predicate to sort record pointers by name.
20992099
struct LessRecord {
21002100
bool operator()(const Record *Rec1, const Record *Rec2) const {
2101-
return StringRef(Rec1->getName()).compare_numeric(Rec2->getName()) < 0;
2101+
return Rec1->getName().compare_numeric(Rec2->getName()) < 0;
21022102
}
21032103
};
21042104

llvm/lib/Analysis/ModuleSummaryAnalysis.cpp

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,12 @@ extern cl::opt<unsigned> MaxNumVTableAnnotations;
9595
// global vars at all. When importing function we aren't interested if any
9696
// instruction in it takes an address of any basic block, because instruction
9797
// can only take an address of basic block located in the same function.
98+
// Set `RefLocalLinkageIFunc` to true if the analyzed value references a
99+
// local-linkage ifunc.
98100
static bool findRefEdges(ModuleSummaryIndex &Index, const User *CurUser,
99101
SetVector<ValueInfo, std::vector<ValueInfo>> &RefEdges,
100-
SmallPtrSet<const User *, 8> &Visited) {
102+
SmallPtrSet<const User *, 8> &Visited,
103+
bool &RefLocalLinkageIFunc) {
101104
bool HasBlockAddress = false;
102105
SmallVector<const User *, 32> Worklist;
103106
if (Visited.insert(CurUser).second)
@@ -119,8 +122,18 @@ static bool findRefEdges(ModuleSummaryIndex &Index, const User *CurUser,
119122
// We have a reference to a global value. This should be added to
120123
// the reference set unless it is a callee. Callees are handled
121124
// specially by WriteFunction and are added to a separate list.
122-
if (!(CB && CB->isCallee(&OI)))
125+
if (!(CB && CB->isCallee(&OI))) {
126+
// If an ifunc has local linkage, do not add it into ref edges, and
127+
// sets `RefLocalLinkageIFunc` to true. The referencer is not eligible
128+
// for import. An ifunc doesn't have summary and ThinLTO cannot
129+
// promote it; importing the referencer may cause linkage errors.
130+
if (auto *GI = dyn_cast_if_present<GlobalIFunc>(GV);
131+
GI && GI->hasLocalLinkage()) {
132+
RefLocalLinkageIFunc = true;
133+
continue;
134+
}
123135
RefEdges.insert(Index.getOrInsertValueInfo(GV));
136+
}
124137
continue;
125138
}
126139
if (Visited.insert(Operand).second)
@@ -313,7 +326,8 @@ static void computeFunctionSummary(
313326

314327
// Add personality function, prefix data and prologue data to function's ref
315328
// list.
316-
findRefEdges(Index, &F, RefEdges, Visited);
329+
bool HasLocalIFuncCallOrRef = false;
330+
findRefEdges(Index, &F, RefEdges, Visited, HasLocalIFuncCallOrRef);
317331
std::vector<const Instruction *> NonVolatileLoads;
318332
std::vector<const Instruction *> NonVolatileStores;
319333

@@ -326,7 +340,6 @@ static void computeFunctionSummary(
326340

327341
bool HasInlineAsmMaybeReferencingInternal = false;
328342
bool HasIndirBranchToBlockAddress = false;
329-
bool HasIFuncCall = false;
330343
bool HasUnknownCall = false;
331344
bool MayThrow = false;
332345
for (const BasicBlock &BB : F) {
@@ -372,11 +385,11 @@ static void computeFunctionSummary(
372385
// of calling it we should add GV to RefEdges directly.
373386
RefEdges.insert(Index.getOrInsertValueInfo(GV));
374387
else if (auto *U = dyn_cast<User>(Stored))
375-
findRefEdges(Index, U, RefEdges, Visited);
388+
findRefEdges(Index, U, RefEdges, Visited, HasLocalIFuncCallOrRef);
376389
continue;
377390
}
378391
}
379-
findRefEdges(Index, &I, RefEdges, Visited);
392+
findRefEdges(Index, &I, RefEdges, Visited, HasLocalIFuncCallOrRef);
380393
const auto *CB = dyn_cast<CallBase>(&I);
381394
if (!CB) {
382395
if (I.mayThrow())
@@ -450,7 +463,7 @@ static void computeFunctionSummary(
450463
// Non-local ifunc is not cloned and does not have the issue.
451464
if (auto *GI = dyn_cast_if_present<GlobalIFunc>(CalledValue))
452465
if (GI->hasLocalLinkage())
453-
HasIFuncCall = true;
466+
HasLocalIFuncCallOrRef = true;
454467
// Skip inline assembly calls.
455468
if (CI && CI->isInlineAsm())
456469
continue;
@@ -555,7 +568,7 @@ static void computeFunctionSummary(
555568
SmallPtrSet<const User *, 8> &Cache) {
556569
for (const auto *I : Instrs) {
557570
Cache.erase(I);
558-
findRefEdges(Index, I, Edges, Cache);
571+
findRefEdges(Index, I, Edges, Cache, HasLocalIFuncCallOrRef);
559572
}
560573
};
561574

@@ -631,9 +644,9 @@ static void computeFunctionSummary(
631644
#endif
632645

633646
bool NonRenamableLocal = isNonRenamableLocal(F);
634-
bool NotEligibleForImport = NonRenamableLocal ||
635-
HasInlineAsmMaybeReferencingInternal ||
636-
HasIndirBranchToBlockAddress || HasIFuncCall;
647+
bool NotEligibleForImport =
648+
NonRenamableLocal || HasInlineAsmMaybeReferencingInternal ||
649+
HasIndirBranchToBlockAddress || HasLocalIFuncCallOrRef;
637650
GlobalValueSummary::GVFlags Flags(
638651
F.getLinkage(), F.getVisibility(), NotEligibleForImport,
639652
/* Live = */ false, F.isDSOLocal(), F.canBeOmittedFromSymbolTable(),
@@ -787,7 +800,10 @@ static void computeVariableSummary(ModuleSummaryIndex &Index,
787800
SmallVectorImpl<MDNode *> &Types) {
788801
SetVector<ValueInfo, std::vector<ValueInfo>> RefEdges;
789802
SmallPtrSet<const User *, 8> Visited;
790-
bool HasBlockAddress = findRefEdges(Index, &V, RefEdges, Visited);
803+
bool RefLocalIFunc = false;
804+
bool HasBlockAddress =
805+
findRefEdges(Index, &V, RefEdges, Visited, RefLocalIFunc);
806+
const bool NotEligibleForImport = (HasBlockAddress || RefLocalIFunc);
791807
bool NonRenamableLocal = isNonRenamableLocal(V);
792808
GlobalValueSummary::GVFlags Flags(
793809
V.getLinkage(), V.getVisibility(), NonRenamableLocal,
@@ -821,7 +837,7 @@ static void computeVariableSummary(ModuleSummaryIndex &Index,
821837
RefEdges.takeVector());
822838
if (NonRenamableLocal)
823839
CantBePromoted.insert(V.getGUID());
824-
if (HasBlockAddress)
840+
if (NotEligibleForImport)
825841
GVarSummary->setNotEligibleToImport();
826842
if (!VTableFuncs.empty())
827843
GVarSummary->setVTableFuncs(VTableFuncs);

llvm/lib/TableGen/Record.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3251,9 +3251,7 @@ std::vector<Record *> RecordKeeper::getAllDerivedDefinitions(
32513251
Defs.push_back(OneDef.second.get());
32523252
}
32533253

3254-
llvm::sort(Defs, [](Record *LHS, Record *RHS) {
3255-
return LHS->getName().compare_numeric(RHS->getName()) < 0;
3256-
});
3254+
llvm::sort(Defs, LessRecord());
32573255

32583256
return Defs;
32593257
}

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 70 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
544544
if (!Subtarget.is64Bit())
545545
setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
546546

547+
if (Subtarget.is64Bit() && Subtarget.hasAVX()) {
548+
// All CPUs supporting AVX will atomically load/store aligned 128-bit
549+
// values, so we can emit [V]MOVAPS/[V]MOVDQA.
550+
setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
551+
setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
552+
}
553+
547554
if (Subtarget.canUseCMPXCHG16B())
548555
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
549556

@@ -30415,32 +30422,40 @@ TargetLoweringBase::AtomicExpansionKind
3041530422
X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
3041630423
Type *MemType = SI->getValueOperand()->getType();
3041730424

30418-
bool NoImplicitFloatOps =
30419-
SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
30420-
if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
30421-
!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
30422-
(Subtarget.hasSSE1() || Subtarget.hasX87()))
30423-
return AtomicExpansionKind::None;
30425+
if (!SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) &&
30426+
!Subtarget.useSoftFloat()) {
30427+
if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
30428+
(Subtarget.hasSSE1() || Subtarget.hasX87()))
30429+
return AtomicExpansionKind::None;
30430+
30431+
if (MemType->getPrimitiveSizeInBits() == 128 && Subtarget.is64Bit() &&
30432+
Subtarget.hasAVX())
30433+
return AtomicExpansionKind::None;
30434+
}
3042430435

3042530436
return needsCmpXchgNb(MemType) ? AtomicExpansionKind::Expand
3042630437
: AtomicExpansionKind::None;
3042730438
}
3042830439

3042930440
// Note: this turns large loads into lock cmpxchg8b/16b.
30430-
// TODO: In 32-bit mode, use MOVLPS when SSE1 is available?
3043130441
TargetLowering::AtomicExpansionKind
3043230442
X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
3043330443
Type *MemType = LI->getType();
3043430444

30435-
// If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we
30436-
// can use movq to do the load. If we have X87 we can load into an 80-bit
30437-
// X87 register and store it to a stack temporary.
30438-
bool NoImplicitFloatOps =
30439-
LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
30440-
if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
30441-
!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
30442-
(Subtarget.hasSSE1() || Subtarget.hasX87()))
30443-
return AtomicExpansionKind::None;
30445+
if (!LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) &&
30446+
!Subtarget.useSoftFloat()) {
30447+
// If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we
30448+
// can use movq to do the load. If we have X87 we can load into an 80-bit
30449+
// X87 register and store it to a stack temporary.
30450+
if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
30451+
(Subtarget.hasSSE1() || Subtarget.hasX87()))
30452+
return AtomicExpansionKind::None;
30453+
30454+
// If this is a 128-bit load with AVX, 128-bit SSE loads/stores are atomic.
30455+
if (MemType->getPrimitiveSizeInBits() == 128 && Subtarget.is64Bit() &&
30456+
Subtarget.hasAVX())
30457+
return AtomicExpansionKind::None;
30458+
}
3044430459

3044530460
return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
3044630461
: AtomicExpansionKind::None;
@@ -31683,14 +31698,21 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
3168331698
if (!IsSeqCst && IsTypeLegal)
3168431699
return Op;
3168531700

31686-
if (VT == MVT::i64 && !IsTypeLegal) {
31701+
if (!IsTypeLegal && !Subtarget.useSoftFloat() &&
31702+
!DAG.getMachineFunction().getFunction().hasFnAttribute(
31703+
Attribute::NoImplicitFloat)) {
31704+
SDValue Chain;
31705+
// For illegal i128 atomic_store, when AVX is enabled, we can simply emit a
31706+
// vector store.
31707+
if (VT == MVT::i128 && Subtarget.is64Bit() && Subtarget.hasAVX()) {
31708+
SDValue VecVal = DAG.getBitcast(MVT::v2i64, Node->getVal());
31709+
Chain = DAG.getStore(Node->getChain(), dl, VecVal, Node->getBasePtr(),
31710+
Node->getMemOperand());
31711+
}
31712+
3168731713
// For illegal i64 atomic_stores, we can try to use MOVQ or MOVLPS if SSE
3168831714
// is enabled.
31689-
bool NoImplicitFloatOps =
31690-
DAG.getMachineFunction().getFunction().hasFnAttribute(
31691-
Attribute::NoImplicitFloat);
31692-
if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
31693-
SDValue Chain;
31715+
if (VT == MVT::i64) {
3169431716
if (Subtarget.hasSSE1()) {
3169531717
SDValue SclToVec =
3169631718
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Node->getVal());
@@ -31722,15 +31744,15 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
3172231744
DAG.getMemIntrinsicNode(X86ISD::FIST, dl, DAG.getVTList(MVT::Other),
3172331745
StoreOps, MVT::i64, Node->getMemOperand());
3172431746
}
31747+
}
3172531748

31726-
if (Chain) {
31727-
// If this is a sequentially consistent store, also emit an appropriate
31728-
// barrier.
31729-
if (IsSeqCst)
31730-
Chain = emitLockedStackOp(DAG, Subtarget, Chain, dl);
31749+
if (Chain) {
31750+
// If this is a sequentially consistent store, also emit an appropriate
31751+
// barrier.
31752+
if (IsSeqCst)
31753+
Chain = emitLockedStackOp(DAG, Subtarget, Chain, dl);
3173131754

31732-
return Chain;
31733-
}
31755+
return Chain;
3173431756
}
3173531757
}
3173631758

@@ -33303,12 +33325,30 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
3330333325
return;
3330433326
}
3330533327
case ISD::ATOMIC_LOAD: {
33306-
assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
33328+
assert(
33329+
(N->getValueType(0) == MVT::i64 || N->getValueType(0) == MVT::i128) &&
33330+
"Unexpected VT!");
3330733331
bool NoImplicitFloatOps =
3330833332
DAG.getMachineFunction().getFunction().hasFnAttribute(
3330933333
Attribute::NoImplicitFloat);
3331033334
if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
3331133335
auto *Node = cast<AtomicSDNode>(N);
33336+
33337+
if (N->getValueType(0) == MVT::i128) {
33338+
if (Subtarget.is64Bit() && Subtarget.hasAVX()) {
33339+
SDValue Ld = DAG.getLoad(MVT::v2i64, dl, Node->getChain(),
33340+
Node->getBasePtr(), Node->getMemOperand());
33341+
SDValue ResL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
33342+
DAG.getIntPtrConstant(0, dl));
33343+
SDValue ResH = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
33344+
DAG.getIntPtrConstant(1, dl));
33345+
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, N->getValueType(0),
33346+
{ResL, ResH}));
33347+
Results.push_back(Ld.getValue(1));
33348+
return;
33349+
}
33350+
break;
33351+
}
3331233352
if (Subtarget.hasSSE1()) {
3331333353
// Use a VZEXT_LOAD which will be selected as MOVQ or XORPS+MOVLPS.
3331433354
// Then extract the lower 64-bits.

llvm/test/CodeGen/X86/atomic-non-integer-fp128.ll

Lines changed: 3 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -28,22 +28,7 @@ define void @store_fp128(ptr %fptr, fp128 %v) {
2828
;
2929
; X64-AVX-LABEL: store_fp128:
3030
; X64-AVX: # %bb.0:
31-
; X64-AVX-NEXT: pushq %rbx
32-
; X64-AVX-NEXT: .cfi_def_cfa_offset 16
33-
; X64-AVX-NEXT: .cfi_offset %rbx, -16
34-
; X64-AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
35-
; X64-AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rbx
36-
; X64-AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
37-
; X64-AVX-NEXT: movq (%rdi), %rax
38-
; X64-AVX-NEXT: movq 8(%rdi), %rdx
39-
; X64-AVX-NEXT: .p2align 4, 0x90
40-
; X64-AVX-NEXT: .LBB0_1: # %atomicrmw.start
41-
; X64-AVX-NEXT: # =>This Inner Loop Header: Depth=1
42-
; X64-AVX-NEXT: lock cmpxchg16b (%rdi)
43-
; X64-AVX-NEXT: jne .LBB0_1
44-
; X64-AVX-NEXT: # %bb.2: # %atomicrmw.end
45-
; X64-AVX-NEXT: popq %rbx
46-
; X64-AVX-NEXT: .cfi_def_cfa_offset 8
31+
; X64-AVX-NEXT: vmovaps %xmm0, (%rdi)
4732
; X64-AVX-NEXT: retq
4833
store atomic fp128 %v, ptr %fptr unordered, align 16
4934
ret void
@@ -69,19 +54,9 @@ define fp128 @load_fp128(ptr %fptr) {
6954
;
7055
; X64-AVX-LABEL: load_fp128:
7156
; X64-AVX: # %bb.0:
72-
; X64-AVX-NEXT: pushq %rbx
73-
; X64-AVX-NEXT: .cfi_def_cfa_offset 16
74-
; X64-AVX-NEXT: .cfi_offset %rbx, -16
75-
; X64-AVX-NEXT: xorl %eax, %eax
76-
; X64-AVX-NEXT: xorl %edx, %edx
77-
; X64-AVX-NEXT: xorl %ecx, %ecx
78-
; X64-AVX-NEXT: xorl %ebx, %ebx
79-
; X64-AVX-NEXT: lock cmpxchg16b (%rdi)
80-
; X64-AVX-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
81-
; X64-AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
57+
; X64-AVX-NEXT: vmovaps (%rdi), %xmm0
58+
; X64-AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
8259
; X64-AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
83-
; X64-AVX-NEXT: popq %rbx
84-
; X64-AVX-NEXT: .cfi_def_cfa_offset 8
8560
; X64-AVX-NEXT: retq
8661
%v = load atomic fp128, ptr %fptr unordered, align 16
8762
ret fp128 %v

llvm/test/CodeGen/X86/atomic-non-integer.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,6 @@ define void @store_double(ptr %fptr, double %v) {
131131
ret void
132132
}
133133

134-
135134
define half @load_half(ptr %fptr) {
136135
; X86-SSE1-LABEL: load_half:
137136
; X86-SSE1: # %bb.0:

0 commit comments

Comments
 (0)