Skip to content

Commit e553a98

Browse files
author
Jenkins
committed
merge main into amd-staging
Change-Id: Ifd894fe72a12b977fa159bde1b206637f42dffa9
2 parents 9a0cb31 + 7085ac8 commit e553a98

File tree

35 files changed

+1352
-484
lines changed

35 files changed

+1352
-484
lines changed

clang/test/Driver/riscv-profiles.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
// RVA22U64: "-target-feature" "+f"
5858
// RVA22U64: "-target-feature" "+d"
5959
// RVA22U64: "-target-feature" "+c"
60+
// RVA22U64: "-target-feature" "+b"
6061
// RVA22U64: "-target-feature" "+zic64b"
6162
// RVA22U64: "-target-feature" "+zicbom"
6263
// RVA22U64: "-target-feature" "+zicbop"
@@ -83,6 +84,7 @@
8384
// RVA22S64: "-target-feature" "+f"
8485
// RVA22S64: "-target-feature" "+d"
8586
// RVA22S64: "-target-feature" "+c"
87+
// RVA22S64: "-target-feature" "+b"
8688
// RVA22S64: "-target-feature" "+zic64b"
8789
// RVA22S64: "-target-feature" "+zicbom"
8890
// RVA22S64: "-target-feature" "+zicbop"
@@ -118,6 +120,7 @@
118120
// RVA23U64: "-target-feature" "+f"
119121
// RVA23U64: "-target-feature" "+d"
120122
// RVA23U64: "-target-feature" "+c"
123+
// RVA23U64: "-target-feature" "+b"
121124
// RVA23U64: "-target-feature" "+v"
122125
// RVA23U64: "-target-feature" "+zic64b"
123126
// RVA23U64: "-target-feature" "+zicbom"
@@ -156,6 +159,7 @@
156159
// RVA23S64: "-target-feature" "+f"
157160
// RVA23S64: "-target-feature" "+d"
158161
// RVA23S64: "-target-feature" "+c"
162+
// RVA23S64: "-target-feature" "+b"
159163
// RVA23S64: "-target-feature" "+v"
160164
// RVA23S64: "-target-feature" "+h"
161165
// RVA23S64: "-target-feature" "+zic64b"
@@ -217,6 +221,7 @@
217221
// RVB23U64: "-target-feature" "+f"
218222
// RVB23U64: "-target-feature" "+d"
219223
// RVB23U64: "-target-feature" "+c"
224+
// RVB23U64: "-target-feature" "+b"
220225
// RVB23U64: "-target-feature" "+zic64b"
221226
// RVB23U64: "-target-feature" "+zicbom"
222227
// RVB23U64: "-target-feature" "+zicbop"
@@ -249,6 +254,7 @@
249254
// RVB23S64: "-target-feature" "+f"
250255
// RVB23S64: "-target-feature" "+d"
251256
// RVB23S64: "-target-feature" "+c"
257+
// RVB23S64: "-target-feature" "+b"
252258
// RVB23S64: "-target-feature" "+zic64b"
253259
// RVB23S64: "-target-feature" "+zicbom"
254260
// RVB23S64: "-target-feature" "+zicbop"
@@ -290,6 +296,7 @@
290296
// RUN: %clang --target=riscv32 -### -c %s 2>&1 -march=rvm23u32 -menable-experimental-extensions \
291297
// RUN: | FileCheck -check-prefix=RVM23U32 %s
292298
// RVM23U32: "-target-feature" "+m"
299+
// RVM23U32: "-target-feature" "+b"
293300
// RVM23U32: "-target-feature" "+zicbop"
294301
// RVM23U32: "-target-feature" "+zicond"
295302
// RVM23U32: "-target-feature" "+zicsr"
@@ -309,6 +316,7 @@
309316
// PROFILE-WITH-ADDITIONAL: "-target-feature" "+f"
310317
// PROFILE-WITH-ADDITIONAL: "-target-feature" "+d"
311318
// PROFILE-WITH-ADDITIONAL: "-target-feature" "+c"
319+
// PROFILE-WITH-ADDITIONAL: "-target-feature" "+b"
312320
// PROFILE-WITH-ADDITIONAL: "-target-feature" "+zicbom"
313321
// PROFILE-WITH-ADDITIONAL: "-target-feature" "+zicbop"
314322
// PROFILE-WITH-ADDITIONAL: "-target-feature" "+zicboz"

compiler-rt/lib/ctx_profile/CtxInstrContextNode.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
//==============================================================================
99
//
1010
// NOTE!
11-
// llvm/lib/ProfileData/CtxInstrContextNode.h and
11+
// llvm/include/llvm/ProfileData/CtxInstrContextNode.h and
1212
// compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
13-
// must be exact copies of each other
13+
// must be exact copies of each other.
1414
//
1515
// compiler-rt creates these objects as part of the instrumentation runtime for
1616
// contextual profiling. LLVM only consumes them to convert a contextual tree
@@ -114,4 +114,4 @@ class ContextNode final {
114114
};
115115
} // namespace ctx_profile
116116
} // namespace llvm
117-
#endif
117+
#endif

lld/test/ELF/aarch64-feature-pac.s

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,14 @@
7676
# PACDYN-NOT: 0x0000000070000001 (AARCH64_BTI_PLT)
7777
# PACDYN-NOT: 0x0000000070000003 (AARCH64_PAC_PLT)
7878

79-
## Turn on PAC entries with the -z pac-plt command line option. There are no
80-
## warnings in this case as the choice to use PAC in PLT entries is orthogonal
81-
## to the choice of using PAC in relocatable objects. The presence of the PAC
82-
## .note.gnu.property is an indication of preference by the relocatable object.
79+
## Turn on PAC entries with the -z pac-plt command line option. For files w/o
80+
## GNU_PROPERTY_AARCH64_FEATURE_1_PAC set in GNU_PROPERTY_AARCH64_FEATURE_1_AND
81+
## property, emit a warning.
82+
83+
# RUN: ld.lld %t.o %t2.o -z pac-plt %t.so -o %tpacplt.exe 2>&1 | FileCheck -DFILE=%t2.o --check-prefix WARN %s
84+
85+
# WARN: warning: [[FILE]]: -z pac-plt: file does not have GNU_PROPERTY_AARCH64_FEATURE_1_PAC property
8386

84-
# RUN: ld.lld %t.o %t2.o -z pac-plt %t.so -o %tpacplt.exe
8587
# RUN: llvm-readelf -n %tpacplt.exe | FileCheck --check-prefix=PACPROP %s
8688
# RUN: llvm-readelf --dynamic-table %tpacplt.exe | FileCheck --check-prefix PACDYN2 %s
8789
# RUN: llvm-objdump --no-print-imm-hex -d --mattr=+v8.3a --no-show-raw-insn %tpacplt.exe | FileCheck --check-prefix PACPLT %s

llvm/docs/NVPTXUsage.rst

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,70 @@ described in the ``s2g.tile`` mode intrinsics above.
599599
For more information, refer PTX ISA
600600
`<https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor>`_.
601601

602+
'``llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.[1-5]d``'
603+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
604+
605+
Syntax:
606+
"""""""
607+
608+
.. code-block:: llvm
609+
610+
declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.1d(ptr %tensor_map, i32 %d0, i64 %ch, i1 %flag_ch)
611+
declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.2d(..., i32 %d0, i32 %d1, ...)
612+
declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.3d(..., i32 %d0, i32 %d1, i32 %d2, ...)
613+
declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.4d(..., i32 %d0, i32 %d1, i32 %d2, i32 %d3, ...)
614+
declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.5d(..., i32 %d0, i32 %d1, i32 %d2, i32 %d3, i32 %d4, ...)
615+
616+
Overview:
617+
"""""""""
618+
619+
The '``@llvm.nvvm.cp.async.bulk.tensor.prefetch.tile.[1-5]d``' intrinsics
620+
correspond to the ``cp.async.bulk.prefetch.tensor.[1-5]d.L2.global*`` set
621+
of PTX instructions. These instructions initiate an asynchronous prefetch
622+
of tensor data from global memory to the L2 cache. In tile mode, the
623+
multi-dimensional layout of the source tensor is preserved at the destination.
624+
The dimension of the tensor data ranges from 1d to 5d with the coordinates
625+
specified by the ``i32 %d0 ... i32 %d4`` arguments.
626+
627+
* The last argument to these intrinsics is a boolean flag
628+
indicating support for cache_hint. This flag argument must
629+
be a compile-time constant. When set, it indicates a valid
630+
cache_hint (``i64 %ch``) and generates the ``.L2::cache_hint``
631+
variant of the PTX instruction.
632+
633+
For more information, refer PTX ISA
634+
`<https://docs.nvidia.com/cuda/parallel-thread-execution/#data-movement-and-conversion-instructions-cp-async-bulk-prefetch-tensor>`_.
635+
636+
'``llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.[1-5]d``'
637+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
638+
639+
Syntax:
640+
"""""""
641+
642+
.. code-block:: llvm
643+
644+
declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.3d(ptr %tensor_map, i32 %d0, i32 %d1, i32 %d2, i16 %im2col0, i64 %ch, i1 %flag_ch)
645+
declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.4d(..., i32 %d0, i32 %d1, i32 %d2, i32 %d3, i16 %im2col0, i16 %im2col1, ...)
646+
declare void @llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.5d(..., i32 %d0, i32 %d1, i32 %d2, i32 %d3, i32 %d4, i16 %im2col0, i16 %im2col1, i16 %im2col2, ...)
647+
648+
Overview:
649+
"""""""""
650+
651+
The '``@llvm.nvvm.cp.async.bulk.tensor.prefetch.im2col.[1-5]d``' intrinsics
652+
correspond to the ``cp.async.bulk.prefetch.tensor.[1-5]d.L2.global*`` set
653+
of PTX instructions. These instructions initiate an asynchronous prefetch
654+
of tensor data from global memory to the L2 cache. In im2col mode, some
655+
dimensions of the source tensor are unrolled into a single dimensional
656+
column at the destination. In this mode, the tensor has to be at least
657+
three-dimensional. Along with the tensor coordinates, im2col offsets are
658+
also specified (denoted by ``i16 im2col0...i16 %im2col2``). The number
659+
of im2col offsets is two less than the number of dimensions of the tensor
660+
operation. The last argument to these intrinsics is a boolean flag, with
661+
the same functionality as described in the ``tile`` mode intrinsics above.
662+
663+
For more information, refer PTX ISA
664+
`<https://docs.nvidia.com/cuda/parallel-thread-execution/#data-movement-and-conversion-instructions-cp-async-bulk-prefetch-tensor>`_.
665+
602666
Other Intrinsics
603667
----------------
604668

llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,6 +1102,13 @@ class LegalizeRuleSet {
11021102
return minScalar(TypeIdx, MinTy).maxScalar(TypeIdx, MaxTy);
11031103
}
11041104

1105+
LegalizeRuleSet &clampScalar(bool Pred, unsigned TypeIdx, const LLT MinTy,
1106+
const LLT MaxTy) {
1107+
if (!Pred)
1108+
return *this;
1109+
return clampScalar(TypeIdx, MinTy, MaxTy);
1110+
}
1111+
11051112
/// Limit the range of scalar sizes to MinTy and MaxTy.
11061113
LegalizeRuleSet &clampScalarOrElt(unsigned TypeIdx, const LLT MinTy,
11071114
const LLT MaxTy) {

llvm/include/llvm/ExecutionEngine/Orc/Core.h

Lines changed: 2 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919
#include "llvm/ADT/IntrusiveRefCntPtr.h"
2020
#include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h"
2121
#include "llvm/ExecutionEngine/JITSymbol.h"
22+
#include "llvm/ExecutionEngine/Orc/CoreContainers.h"
2223
#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
24+
#include "llvm/ExecutionEngine/Orc/MaterializationUnit.h"
2325
#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
2426
#include "llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h"
2527
#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
@@ -39,7 +41,6 @@ namespace orc {
3941
// Forward declare some classes.
4042
class AsynchronousSymbolQuery;
4143
class ExecutionSession;
42-
class MaterializationUnit;
4344
class MaterializationResponsibility;
4445
class JITDylib;
4546
class ResourceTracker;
@@ -109,23 +110,6 @@ class ResourceManager {
109110
ResourceKey SrcK) = 0;
110111
};
111112

112-
/// A set of symbol names (represented by SymbolStringPtrs for
113-
// efficiency).
114-
using SymbolNameSet = DenseSet<SymbolStringPtr>;
115-
116-
/// A vector of symbol names.
117-
using SymbolNameVector = std::vector<SymbolStringPtr>;
118-
119-
/// A map from symbol names (as SymbolStringPtrs) to JITSymbols
120-
/// (address/flags pairs).
121-
using SymbolMap = DenseMap<SymbolStringPtr, ExecutorSymbolDef>;
122-
123-
/// A map from symbol names (as SymbolStringPtrs) to JITSymbolFlags.
124-
using SymbolFlagsMap = DenseMap<SymbolStringPtr, JITSymbolFlags>;
125-
126-
/// A map from JITDylibs to sets of symbols.
127-
using SymbolDependenceMap = DenseMap<JITDylib *, SymbolNameSet>;
128-
129113
/// Lookup flags that apply to each dylib in the search order for a lookup.
130114
///
131115
/// If MatchHiddenSymbolsOnly is used (the default) for a given dylib, then
@@ -682,83 +666,6 @@ class MaterializationResponsibility {
682666
SymbolStringPtr InitSymbol;
683667
};
684668

685-
/// A MaterializationUnit represents a set of symbol definitions that can
686-
/// be materialized as a group, or individually discarded (when
687-
/// overriding definitions are encountered).
688-
///
689-
/// MaterializationUnits are used when providing lazy definitions of symbols to
690-
/// JITDylibs. The JITDylib will call materialize when the address of a symbol
691-
/// is requested via the lookup method. The JITDylib will call discard if a
692-
/// stronger definition is added or already present.
693-
class MaterializationUnit {
694-
friend class ExecutionSession;
695-
friend class JITDylib;
696-
697-
public:
698-
static char ID;
699-
700-
struct Interface {
701-
Interface() = default;
702-
Interface(SymbolFlagsMap InitalSymbolFlags, SymbolStringPtr InitSymbol)
703-
: SymbolFlags(std::move(InitalSymbolFlags)),
704-
InitSymbol(std::move(InitSymbol)) {
705-
assert((!this->InitSymbol || this->SymbolFlags.count(this->InitSymbol)) &&
706-
"If set, InitSymbol should appear in InitialSymbolFlags map");
707-
}
708-
709-
SymbolFlagsMap SymbolFlags;
710-
SymbolStringPtr InitSymbol;
711-
};
712-
713-
MaterializationUnit(Interface I)
714-
: SymbolFlags(std::move(I.SymbolFlags)),
715-
InitSymbol(std::move(I.InitSymbol)) {}
716-
virtual ~MaterializationUnit() = default;
717-
718-
/// Return the name of this materialization unit. Useful for debugging
719-
/// output.
720-
virtual StringRef getName() const = 0;
721-
722-
/// Return the set of symbols that this source provides.
723-
const SymbolFlagsMap &getSymbols() const { return SymbolFlags; }
724-
725-
/// Returns the initialization symbol for this MaterializationUnit (if any).
726-
const SymbolStringPtr &getInitializerSymbol() const { return InitSymbol; }
727-
728-
/// Implementations of this method should materialize all symbols
729-
/// in the materialzation unit, except for those that have been
730-
/// previously discarded.
731-
virtual void
732-
materialize(std::unique_ptr<MaterializationResponsibility> R) = 0;
733-
734-
/// Called by JITDylibs to notify MaterializationUnits that the given symbol
735-
/// has been overridden.
736-
void doDiscard(const JITDylib &JD, const SymbolStringPtr &Name) {
737-
SymbolFlags.erase(Name);
738-
if (InitSymbol == Name) {
739-
DEBUG_WITH_TYPE("orc", {
740-
dbgs() << "In " << getName() << ": discarding init symbol \""
741-
<< *Name << "\"\n";
742-
});
743-
InitSymbol = nullptr;
744-
}
745-
discard(JD, std::move(Name));
746-
}
747-
748-
protected:
749-
SymbolFlagsMap SymbolFlags;
750-
SymbolStringPtr InitSymbol;
751-
752-
private:
753-
virtual void anchor();
754-
755-
/// Implementations of this method should discard the given symbol
756-
/// from the source (e.g. if the source is an LLVM IR Module and the
757-
/// symbol is a function, delete the function body or mark it available
758-
/// externally).
759-
virtual void discard(const JITDylib &JD, const SymbolStringPtr &Name) = 0;
760-
};
761-
762669
/// A MaterializationUnit implementation for pre-existing absolute symbols.
763670
///
764671
/// All symbols will be resolved and marked ready as soon as the unit is
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
//===---- CoreContainers.h - Symbol Containers for Core APIs ----*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Symbol container types for core ORC APIs.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#ifndef LLVM_EXECUTIONENGINE_ORC_CORECONTAINERS_H
14+
#define LLVM_EXECUTIONENGINE_ORC_CORECONTAINERS_H
15+
16+
#include "llvm/ADT/DenseMap.h"
17+
#include "llvm/ADT/DenseSet.h"
18+
#include "llvm/ExecutionEngine/JITSymbol.h"
19+
#include "llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h"
20+
#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
21+
22+
#include <vector>
23+
24+
namespace llvm::orc {
25+
26+
class JITDylib;
27+
28+
/// A set of symbol names (represented by SymbolStringPtrs for
29+
// efficiency).
30+
using SymbolNameSet = DenseSet<SymbolStringPtr>;
31+
32+
/// A vector of symbol names.
33+
using SymbolNameVector = std::vector<SymbolStringPtr>;
34+
35+
/// A map from symbol names (as SymbolStringPtrs) to JITSymbols
36+
/// (address/flags pairs).
37+
using SymbolMap = DenseMap<SymbolStringPtr, ExecutorSymbolDef>;
38+
39+
/// A map from symbol names (as SymbolStringPtrs) to JITSymbolFlags.
40+
using SymbolFlagsMap = DenseMap<SymbolStringPtr, JITSymbolFlags>;
41+
42+
/// A map from JITDylibs to sets of symbols.
43+
using SymbolDependenceMap = DenseMap<JITDylib *, SymbolNameSet>;
44+
45+
} // End namespace llvm::orc
46+
47+
#endif // LLVM_EXECUTIONENGINE_ORC_CORECONTAINERS_H

0 commit comments

Comments
 (0)