Skip to content

Commit 378c205

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:7a28a5b3fee6c78ad59af79a3d03c00db153c49f into amd-gfx:6434034d613e
Local branch amd-gfx 6434034 Merged main:7429950d840b8fec3d9a48d00e612a3240c2be83 into amd-gfx:49c66abb8fcb Remote branch main 7a28a5b [clang][Sema] Fix crash when diagnosing candidates with parameter packs (llvm#93079)
2 parents 6434034 + 7a28a5b commit 378c205

File tree

61 files changed

+5412
-26
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+5412
-26
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4403,6 +4403,7 @@ immediately after the name being declared.
44034403
For example, this applies the GNU ``unused`` attribute to ``a`` and ``f``, and
44044404
also applies the GNU ``noreturn`` attribute to ``f``.
44054405
4406+
Examples:
44064407
.. code-block:: c++
44074408
44084409
[[gnu::unused]] int a, f [[gnu::noreturn]] ();
@@ -4412,6 +4413,42 @@ Target-Specific Extensions
44124413
44134414
Clang supports some language features conditionally on some targets.
44144415
4416+
AMDGPU Language Extensions
4417+
--------------------------
4418+
4419+
__builtin_amdgcn_fence
4420+
^^^^^^^^^^^^^^^^^^^^^^
4421+
4422+
``__builtin_amdgcn_fence`` emits a fence.
4423+
4424+
* ``unsigned`` atomic ordering, e.g. ``__ATOMIC_ACQUIRE``
4425+
* ``const char *`` synchronization scope, e.g. ``workgroup``
4426+
* Zero or more ``const char *`` address spaces names.
4427+
4428+
The address spaces arguments must be one of the following string literals:
4429+
4430+
* ``"local"``
4431+
* ``"global"``
4432+
4433+
If one or more address space name are provided, the code generator will attempt
4434+
to emit potentially faster instructions that order access to at least those
4435+
address spaces.
4436+
Emitting such instructions may not always be possible and the compiler is free
4437+
to fence more aggressively.
4438+
4439+
If no address spaces names are provided, all address spaces are fenced.
4440+
4441+
.. code-block:: c++
4442+
4443+
// Fence all address spaces.
4444+
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
4445+
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
4446+
4447+
// Fence only requested address spaces.
4448+
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup", "local")
4449+
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup", "local", "global")
4450+
4451+
44154452
ARM/AArch64 Language Extensions
44164453
-------------------------------
44174454
@@ -5602,4 +5639,4 @@ Compiling different TUs depending on these flags (including use of
56025639
``std::hardware_constructive_interference`` or
56035640
``std::hardware_destructive_interference``) with different compilers, macro
56045641
definitions, or architecture flags will lead to ODR violations and should be
5605-
avoided.
5642+
avoided.

clang/docs/ReleaseNotes.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -734,7 +734,6 @@ Bug Fixes to C++ Support
734734
from being explicitly specialized for a given implicit instantiation of the class template.
735735
- Fixed a crash when ``this`` is used in a dependent class scope function template specialization
736736
that instantiates to a static member function.
737-
738737
- Fix crash when inheriting from a cv-qualified type. Fixes #GH35603
739738
- Fix a crash when the using enum declaration uses an anonymous enumeration. Fixes (#GH86790).
740739
- Handled an edge case in ``getFullyPackExpandedSize`` so that we now avoid a false-positive diagnostic. (#GH84220)
@@ -796,6 +795,8 @@ Bug Fixes to C++ Support
796795
Fixes (#GH91308).
797796
- Fix a crash caused by a regression in the handling of ``source_location``
798797
in dependent contexts. Fixes (#GH92680).
798+
- Fixed a crash when diagnosing failed conversions involving template parameter
799+
packs. (#GH93076)
799800

800801
Bug Fixes to AST Handling
801802
^^^^^^^^^^^^^^^^^^^^^^^^^

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ BUILTIN(__builtin_amdgcn_sched_group_barrier, "vIiIiIi", "n")
6868
BUILTIN(__builtin_amdgcn_iglp_opt, "vIi", "n")
6969
BUILTIN(__builtin_amdgcn_s_dcache_inv, "v", "n")
7070
BUILTIN(__builtin_amdgcn_buffer_wbinvl1, "v", "n")
71-
BUILTIN(__builtin_amdgcn_fence, "vUicC*", "n")
71+
BUILTIN(__builtin_amdgcn_fence, "vUicC*.", "n")
7272
BUILTIN(__builtin_amdgcn_groupstaticsize, "Ui", "n")
7373
BUILTIN(__builtin_amdgcn_wavefrontsize, "Ui", "nc")
7474

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
#include "llvm/IR/IntrinsicsX86.h"
5858
#include "llvm/IR/MDBuilder.h"
5959
#include "llvm/IR/MatrixBuilder.h"
60+
#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
6061
#include "llvm/Support/ConvertUTF.h"
6162
#include "llvm/Support/MathExtras.h"
6263
#include "llvm/Support/ScopedPrinter.h"
@@ -18327,6 +18328,29 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
1832718328
return nullptr;
1832818329
}
1832918330

18331+
void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
18332+
const CallExpr *E) {
18333+
constexpr const char *Tag = "amdgpu-as";
18334+
18335+
LLVMContext &Ctx = Inst->getContext();
18336+
SmallVector<MMRAMetadata::TagT, 3> MMRAs;
18337+
for (unsigned K = 2; K < E->getNumArgs(); ++K) {
18338+
llvm::Value *V = EmitScalarExpr(E->getArg(K));
18339+
StringRef AS;
18340+
if (llvm::getConstantStringInfo(V, AS)) {
18341+
MMRAs.push_back({Tag, AS});
18342+
// TODO: Delete the resulting unused constant?
18343+
continue;
18344+
}
18345+
CGM.Error(E->getExprLoc(),
18346+
"expected an address space name as a string literal");
18347+
}
18348+
18349+
llvm::sort(MMRAs);
18350+
MMRAs.erase(llvm::unique(MMRAs), MMRAs.end());
18351+
Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
18352+
}
18353+
1833018354
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1833118355
const CallExpr *E) {
1833218356
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
@@ -18997,7 +19021,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1899719021
case AMDGPU::BI__builtin_amdgcn_fence: {
1899819022
ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)),
1899919023
EmitScalarExpr(E->getArg(1)), AO, SSID);
19000-
return Builder.CreateFence(AO, SSID);
19024+
FenceInst *Fence = Builder.CreateFence(AO, SSID);
19025+
if (E->getNumArgs() > 2)
19026+
AddAMDGPUFenceAddressSpaceMMRA(Fence, E);
19027+
return Fence;
1900119028
}
1900219029
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
1900319030
case AMDGPU::BI__builtin_amdgcn_atomic_inc64:

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4635,6 +4635,9 @@ class CodeGenFunction : public CodeGenTypeCache {
46354635
llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
46364636
llvm::Value *EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
46374637
ReturnValueSlot ReturnValue);
4638+
4639+
void AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
4640+
const CallExpr *E);
46384641
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope,
46394642
llvm::AtomicOrdering &AO,
46404643
llvm::SyncScope::ID &SSID);

clang/lib/Sema/SemaOverload.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "clang/AST/ASTContext.h"
1414
#include "clang/AST/ASTLambda.h"
1515
#include "clang/AST/CXXInheritance.h"
16+
#include "clang/AST/Decl.h"
1617
#include "clang/AST/DeclCXX.h"
1718
#include "clang/AST/DeclObjC.h"
1819
#include "clang/AST/DependenceFlags.h"
@@ -11301,8 +11302,16 @@ static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand,
1130111302
Expr *FromExpr = Conv.Bad.FromExpr;
1130211303
QualType FromTy = Conv.Bad.getFromType();
1130311304
QualType ToTy = Conv.Bad.getToType();
11304-
SourceRange ToParamRange =
11305-
!isObjectArgument ? Fn->getParamDecl(I)->getSourceRange() : SourceRange();
11305+
SourceRange ToParamRange;
11306+
11307+
// FIXME: In presence of parameter packs we can't determine parameter range
11308+
// reliably, as we don't have access to instantiation.
11309+
bool HasParamPack =
11310+
llvm::any_of(Fn->parameters().take_front(I), [](const ParmVarDecl *Parm) {
11311+
return Parm->isParameterPack();
11312+
});
11313+
if (!isObjectArgument && !HasParamPack)
11314+
ToParamRange = Fn->getParamDecl(I)->getSourceRange();
1130611315

1130711316
if (FromTy == S.Context.OverloadTy) {
1130811317
assert(FromExpr && "overload set argument came from implicit argument?");
Lines changed: 96 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,111 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
12
// REQUIRES: amdgpu-registered-target
23
// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
3-
// RUN: -triple=amdgcn-amd-amdhsa | opt -S | FileCheck %s
4+
// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s
45

6+
// CHECK-LABEL: define dso_local void @_Z25test_memory_fence_successv(
7+
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
8+
// CHECK-NEXT: entry:
9+
// CHECK-NEXT: fence syncscope("workgroup") seq_cst
10+
// CHECK-NEXT: fence syncscope("agent") acquire
11+
// CHECK-NEXT: fence seq_cst
12+
// CHECK-NEXT: fence syncscope("agent") acq_rel
13+
// CHECK-NEXT: fence syncscope("workgroup") release
14+
// CHECK-NEXT: ret void
15+
//
516
void test_memory_fence_success() {
6-
// CHECK-LABEL: test_memory_fence_success
717

8-
// CHECK: fence syncscope("workgroup") seq_cst
918
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
1019

11-
// CHECK: fence syncscope("agent") acquire
1220
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
1321

14-
// CHECK: fence seq_cst
1522
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
1623

17-
// CHECK: fence syncscope("agent") acq_rel
1824
__builtin_amdgcn_fence(4, "agent");
1925

20-
// CHECK: fence syncscope("workgroup") release
2126
__builtin_amdgcn_fence(3, "workgroup");
2227
}
28+
29+
// CHECK-LABEL: define dso_local void @_Z10test_localv(
30+
// CHECK-SAME: ) #[[ATTR0]] {
31+
// CHECK-NEXT: entry:
32+
// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]]
33+
// CHECK-NEXT: fence syncscope("agent") acquire, !mmra [[META3]]
34+
// CHECK-NEXT: fence seq_cst, !mmra [[META3]]
35+
// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
36+
// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
37+
// CHECK-NEXT: ret void
38+
//
39+
void test_local() {
40+
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local");
41+
42+
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent", "local");
43+
44+
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "", "local");
45+
46+
__builtin_amdgcn_fence(4, "agent", "local");
47+
48+
__builtin_amdgcn_fence(3, "workgroup", "local");
49+
}
50+
51+
52+
// CHECK-LABEL: define dso_local void @_Z11test_globalv(
53+
// CHECK-SAME: ) #[[ATTR0]] {
54+
// CHECK-NEXT: entry:
55+
// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]]
56+
// CHECK-NEXT: fence syncscope("agent") acquire, !mmra [[META4]]
57+
// CHECK-NEXT: fence seq_cst, !mmra [[META4]]
58+
// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META4]]
59+
// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META4]]
60+
// CHECK-NEXT: ret void
61+
//
62+
void test_global() {
63+
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "global");
64+
65+
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent", "global");
66+
67+
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "", "global");
68+
69+
__builtin_amdgcn_fence(4, "agent", "global");
70+
71+
__builtin_amdgcn_fence(3, "workgroup", "global");
72+
}
73+
74+
// CHECK-LABEL: define dso_local void @_Z10test_imagev(
75+
// CHECK-SAME: ) #[[ATTR0]] {
76+
// CHECK-NEXT: entry:
77+
// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3]]
78+
// CHECK-NEXT: fence syncscope("agent") acquire, !mmra [[META3]]
79+
// CHECK-NEXT: fence seq_cst, !mmra [[META3]]
80+
// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
81+
// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
82+
// CHECK-NEXT: ret void
83+
//
84+
void test_image() {
85+
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local");
86+
87+
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent", "local");
88+
89+
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "", "local");
90+
91+
__builtin_amdgcn_fence(4, "agent", "local");
92+
93+
__builtin_amdgcn_fence(3, "workgroup", "local");
94+
}
95+
96+
// CHECK-LABEL: define dso_local void @_Z10test_mixedv(
97+
// CHECK-SAME: ) #[[ATTR0]] {
98+
// CHECK-NEXT: entry:
99+
// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]]
100+
// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]]
101+
// CHECK-NEXT: ret void
102+
//
103+
void test_mixed() {
104+
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "global");
105+
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "local", "global", "local", "local");
106+
}
107+
//.
108+
// CHECK: [[META3]] = !{!"amdgpu-as", !"local"}
109+
// CHECK: [[META4]] = !{!"amdgpu-as", !"global"}
110+
// CHECK: [[META5]] = !{[[META4]], [[META3]]}
111+
//.

clang/test/SemaCXX/overload-template.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,13 @@ namespace overloadCheck{
5858
}
5959
}
6060
#endif
61+
62+
namespace GH93076 {
63+
template <typename ...a> int b(a..., int); // expected-note-re 3 {{candidate function template not viable: no known conversion from 'int ()' to 'int' for {{.*}} argument}}
64+
int d() {
65+
(void)b<int, int>(0, 0, d); // expected-error {{no matching function for call to 'b'}}
66+
(void)b<int, int>(0, d, 0); // expected-error {{no matching function for call to 'b'}}
67+
(void)b<int, int>(d, 0, 0); // expected-error {{no matching function for call to 'b'}}
68+
return 0;
69+
}
70+
}

clang/test/SemaOpenCL/builtins-amdgcn-error.cl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,8 @@ void test_ds_fmaxf(local float *out, float src, int a) {
155155
void test_fence() {
156156
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
157157
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
158-
__builtin_amdgcn_fence(4); // expected-error {{too few arguments to function call, expected 2}}
159-
__builtin_amdgcn_fence(4, 4, 4); // expected-error {{too many arguments to function call, expected 2}}
158+
__builtin_amdgcn_fence(4); // expected-error {{too few arguments to function call, expected at least 2, have 1}}
159+
__builtin_amdgcn_fence(4, 4, 4); // expected-error {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}}
160160
__builtin_amdgcn_fence(3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
161161
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, 5); // expected-error {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}}
162162
const char ptr[] = "workgroup";

flang/include/flang/Optimizer/Builder/FIRBuilder.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,10 @@ class FirOpBuilder : public mlir::OpBuilder, public mlir::OpBuilder::Listener {
5050
mlir::SymbolTable *symbolTable = nullptr)
5151
: OpBuilder{op, /*listener=*/this}, kindMap{std::move(kindMap)},
5252
symbolTable{symbolTable} {}
53-
explicit FirOpBuilder(mlir::OpBuilder &builder, fir::KindMapping kindMap)
54-
: OpBuilder(builder), OpBuilder::Listener(), kindMap{std::move(kindMap)} {
53+
explicit FirOpBuilder(mlir::OpBuilder &builder, fir::KindMapping kindMap,
54+
mlir::SymbolTable *symbolTable = nullptr)
55+
: OpBuilder(builder), OpBuilder::Listener(), kindMap{std::move(kindMap)},
56+
symbolTable{symbolTable} {
5557
setListener(this);
5658
}
5759
explicit FirOpBuilder(mlir::OpBuilder &builder, mlir::ModuleOp mod)

flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,12 @@ constexpr TypeBuilderFunc getModel<signed char>() {
130130
};
131131
}
132132
template <>
133+
constexpr TypeBuilderFunc getModel<unsigned char>() {
134+
return [](mlir::MLIRContext *context) -> mlir::Type {
135+
return mlir::IntegerType::get(context, 8 * sizeof(unsigned char));
136+
};
137+
}
138+
template <>
133139
constexpr TypeBuilderFunc getModel<void *>() {
134140
return [](mlir::MLIRContext *context) -> mlir::Type {
135141
return fir::LLVMPointerType::get(context,
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
//===-- Support.h - generate support runtime API calls ----------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef FORTRAN_OPTIMIZER_BUILDER_RUNTIME_SUPPORT_H
10+
#define FORTRAN_OPTIMIZER_BUILDER_RUNTIME_SUPPORT_H
11+
12+
namespace mlir {
13+
class Value;
14+
class Location;
15+
} // namespace mlir
16+
17+
namespace fir {
18+
class FirOpBuilder;
19+
}
20+
21+
namespace fir::runtime {
22+
23+
/// Generate call to `CopyAndUpdateDescriptor` runtime routine.
24+
void genCopyAndUpdateDescriptor(fir::FirOpBuilder &builder, mlir::Location loc,
25+
mlir::Value to, mlir::Value from,
26+
mlir::Value newDynamicType,
27+
mlir::Value newAttribute,
28+
mlir::Value newLowerBounds);
29+
30+
} // namespace fir::runtime
31+
#endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_SUPPORT_H

flang/include/flang/Optimizer/Dialect/FIRAttr.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,4 +70,15 @@ def fir_BoxFieldAttr : I32EnumAttr<
7070
// mlir::SideEffects::Resource for modelling operations which add debugging information
7171
def DebuggingResource : Resource<"::fir::DebuggingResource">;
7272

73+
def fir_LowerBoundModifierAttribute : I32EnumAttr<
74+
"LowerBoundModifierAttribute",
75+
"Describes how to modify lower bounds",
76+
[
77+
I32EnumAttrCase<"Preserve", 0, "preserve">,
78+
I32EnumAttrCase<"SetToOnes", 1, "ones">,
79+
I32EnumAttrCase<"SetToZeroes", 2, "zeroes">,
80+
]> {
81+
let cppNamespace = "::fir";
82+
}
83+
7384
#endif // FIR_DIALECT_FIR_ATTRS

0 commit comments

Comments
 (0)