Skip to content

Commit cf1a77e

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:96ef623a7525 into amd-gfx:cf4b971d071e
Local branch amd-gfx cf4b971 Merged main:0a0e06f29145 into amd-gfx:481034665eb6 Remote branch main 96ef623 [AArch64] Cast predicate operand of SVE gather loads/scater stores to the parameter type of the intrinsic (NFC) (llvm#71289)
2 parents cf4b971 + 96ef623 commit cf1a77e

File tree

10 files changed

+57
-45
lines changed

10 files changed

+57
-45
lines changed

clang-tools-extra/include-cleaner/lib/WalkAST.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "clang/AST/ASTFwd.h"
1212
#include "clang/AST/Decl.h"
1313
#include "clang/AST/DeclCXX.h"
14+
#include "clang/AST/DeclFriend.h"
1415
#include "clang/AST/DeclTemplate.h"
1516
#include "clang/AST/Expr.h"
1617
#include "clang/AST/ExprCXX.h"
@@ -243,6 +244,14 @@ class ASTWalker : public RecursiveASTVisitor<ASTWalker> {
243244
return true;
244245
}
245246

247+
bool VisitFriendDecl(FriendDecl *D) {
248+
// We already visit the TypeLoc properly, but need to special case the decl
249+
// case.
250+
if (auto *FD = D->getFriendDecl())
251+
report(D->getLocation(), FD);
252+
return true;
253+
}
254+
246255
bool VisitConceptReference(const ConceptReference *CR) {
247256
report(CR->getConceptNameLoc(), CR->getFoundDecl());
248257
return true;

clang-tools-extra/include-cleaner/unittests/WalkASTTest.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -550,5 +550,10 @@ TEST(WalkAST, Concepts) {
550550
// FIXME: Foo should be explicitly referenced.
551551
testWalk("template<typename T> concept Foo = true;", "void func() { ^Foo auto x = 1; }");
552552
}
553+
554+
TEST(WalkAST, FriendDecl) {
555+
testWalk("void $explicit^foo();", "struct Bar { friend void ^foo(); };");
556+
testWalk("struct $explicit^Foo {};", "struct Bar { friend struct ^Foo; };");
557+
}
553558
} // namespace
554559
} // namespace clang::include_cleaner

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9482,13 +9482,6 @@ Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
94829482
auto *OverloadedTy =
94839483
llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
94849484

9485-
// At the ACLE level there's only one predicate type, svbool_t, which is
9486-
// mapped to <n x 16 x i1>. However, this might be incompatible with the
9487-
// actual type being loaded. For example, when loading doubles (i64) the
9488-
// predicated should be <n x 2 x i1> instead. At the IR level the type of
9489-
// the predicate and the data being loaded must match. Cast accordingly.
9490-
Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
9491-
94929485
Function *F = nullptr;
94939486
if (Ops[1]->getType()->isVectorTy())
94949487
// This is the "vector base, scalar offset" case. In order to uniquely
@@ -9502,6 +9495,16 @@ Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
95029495
// intrinsic.
95039496
F = CGM.getIntrinsic(IntID, OverloadedTy);
95049497

9498+
// At the ACLE level there's only one predicate type, svbool_t, which is
9499+
// mapped to <n x 16 x i1>. However, this might be incompatible with the
9500+
// actual type being loaded. For example, when loading doubles (i64) the
9501+
// predicate should be <n x 2 x i1> instead. At the IR level the type of
9502+
// the predicate and the data being loaded must match. Cast to the type
9503+
// expected by the intrinsic. The intrinsic itself should be defined in
9504+
// a way than enforces relations between parameter types.
9505+
Ops[0] = EmitSVEPredicateCast(
9506+
Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
9507+
95059508
// Pass 0 when the offset is missing. This can only be applied when using
95069509
// the "vector base" addressing mode for which ACLE allows no offset. The
95079510
// corresponding LLVM IR always requires an offset.
@@ -9566,8 +9569,11 @@ Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags,
95669569
// mapped to <n x 16 x i1>. However, this might be incompatible with the
95679570
// actual type being stored. For example, when storing doubles (i64) the
95689571
// predicated should be <n x 2 x i1> instead. At the IR level the type of
9569-
// the predicate and the data being stored must match. Cast accordingly.
9570-
Ops[1] = EmitSVEPredicateCast(Ops[1], OverloadedTy);
9572+
// the predicate and the data being stored must match. Cast to the type
9573+
// expected by the intrinsic. The intrinsic itself should be defined in
9574+
// a way that enforces relations between parameter types.
9575+
Ops[1] = EmitSVEPredicateCast(
9576+
Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
95719577

95729578
// For "vector base, scalar index" scale the index so that it becomes a
95739579
// scalar offset.

clang/lib/CodeGen/CGExprScalar.cpp

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2224,18 +2224,8 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
22242224
return Visit(const_cast<Expr*>(E));
22252225

22262226
case CK_NoOp: {
2227-
llvm::Value *V = CE->changesVolatileQualification()
2228-
? EmitLoadOfLValue(CE)
2229-
: Visit(const_cast<Expr *>(E));
2230-
if (V) {
2231-
// CK_NoOp can model a pointer qualification conversion, which can remove
2232-
// an array bound and change the IR type.
2233-
// FIXME: Once pointee types are removed from IR, remove this.
2234-
llvm::Type *T = ConvertType(DestTy);
2235-
if (T != V->getType())
2236-
V = Builder.CreateBitCast(V, T);
2237-
}
2238-
return V;
2227+
return CE->changesVolatileQualification() ? EmitLoadOfLValue(CE)
2228+
: Visit(const_cast<Expr *>(E));
22392229
}
22402230

22412231
case CK_BaseToDerived: {

libc/cmake/modules/LLVMLibCObjectRules.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ function(get_nvptx_compile_options output_var gpu_arch)
8989
set(nvptx_options "")
9090
list(APPEND nvptx_options "-march=${gpu_arch}")
9191
list(APPEND nvptx_options "-Wno-unknown-cuda-version")
92+
list(APPEND nvptx_options "SHELL:-mllvm -nvptx-emit-init-fini-kernel=false")
9293
if(${gpu_arch} STREQUAL "sm_35")
9394
list(APPEND nvptx_options "--cuda-feature=+ptx60")
9495
elseif(${gpu_arch} STREQUAL "sm_37")

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 480586
19+
#define LLVM_MAIN_REVISION 480604
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/lib/CodeGen/LiveIntervalUnion.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
#include "llvm/CodeGen/LiveIntervalUnion.h"
1616
#include "llvm/ADT/STLExtras.h"
17-
#include "llvm/ADT/SparseBitVector.h"
1817
#include "llvm/CodeGen/LiveInterval.h"
1918
#include "llvm/CodeGen/TargetRegisterInfo.h"
2019
#include "llvm/Support/raw_ostream.h"

llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
10-
#include "llvm/ADT/SparseBitVector.h"
1110
#include "llvm/ADT/StringMap.h"
1211
#include "llvm/ADT/StringRef.h"
1312
#include "llvm/DebugInfo/PDB/Native/Hash.h"

llvm/lib/Target/X86/X86FlagsCopyLowering.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
#include "llvm/ADT/ScopeExit.h"
3131
#include "llvm/ADT/SmallPtrSet.h"
3232
#include "llvm/ADT/SmallVector.h"
33-
#include "llvm/ADT/SparseBitVector.h"
3433
#include "llvm/ADT/Statistic.h"
3534
#include "llvm/CodeGen/MachineBasicBlock.h"
3635
#include "llvm/CodeGen/MachineConstantPool.h"

mlir/include/mlir/Dialect/Tensor/IR/TensorBase.td

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,31 +18,35 @@ def Tensor_Dialect : Dialect {
1818
let description = [{
1919
The `tensor` dialect is intended to hold core tensor creation and
2020
manipulation ops, which are not strongly associated with any particular
21-
other dialect or domain abstraction. The primary smoke test of this is ops
22-
that make sense for any tensor element type.
23-
24-
We leave it to other dialects to hold the vast swath of possible
25-
computations one might want to do on a tensor.
26-
27-
The `tensor` type is (for better or for worse) used to represent all kinds
28-
of things, and supports an open-ended set of element types. Examples:
21+
other dialect or domain abstraction. The aim for ops in this dialect is
22+
that they make sense for any tensor element type. When this is not the
23+
case, the op is left to live in other dialects. Examples of element types
24+
that could be supported by the `tensor` dialect include:
2925

3026
- representing large, dense aggregations of primitive types, suitable for
3127
high-performance numerical computing.
32-
- representing shapes in the `shape` dialect, which consist of small
33-
1D tensors of `index` data type.
28+
- representing shapes in the `shape` dialect, which consist of small 1D
29+
tensors of `index` data type.
3430
- representing aggregations of strings or “variant” types.
35-
- representing large, sparse aggregations of primitive types, suitable
36-
for high-performance numerical computing.
37-
38-
Thus, for the `tensor` dialect, we prefer for now to constrain the
39-
scope as much as possible. The expectation is that at some point
40-
in the future, the `tensor` dialect’s scope may be broadened through a
41-
careful discussion of the tradeoffs.
42-
43-
The `tensor` type is actually a builtin type (it lives in the builtin
44-
dialect), and does not live in this dialect.
31+
- representing large, sparse aggregations of primitive types, suitable for
32+
high-performance numerical computing.
4533

34+
Because of this broad element type support and because of the existence of
35+
more dedicated dialects, such as the `sparse_tensor` and `linalg` dialects,
36+
we prefer for now to keep the `tensor` dialect as small as possible. The
37+
expectation is that at some point in the future, the `tensor` dialect’s
38+
scope may be broadened through a careful discussion of the tradeoffs.
39+
40+
On the `tensor` type itself, note that it is actually a builtin type (it
41+
lives in the builtin dialect), and does not live in this dialect.
42+
Furthermore, a `tensor` is an immutable object. For example, this means
43+
that a copy will always be made of the `tensor` object when it is passed to
44+
the `dest` operand used by some ops in this dialect. As an optimization,
45+
an implementation can eliminate these copies during lowering when they
46+
are redundant and perform in-place mutation, see the [Destination-Passing
47+
Style](
48+
https://mlir.llvm.org/docs/Bufferization/#destination-passing-style)
49+
documentation for more information.
4650
}];
4751

4852
let hasCanonicalizer = 1;

0 commit comments

Comments
 (0)