Skip to content

Commit b013540

Browse files
authored
[Clang][LLVM] Port ZCFS from staging (llvm#1617)
2 parents 89d5b06 + 824fcf6 commit b013540

20 files changed

+1048
-4
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4536,6 +4536,120 @@ If no address spaces names are provided, all address spaces are fenced.
45364536
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup", "local")
45374537
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup", "local", "global")
45384538
4539+
__builtin_amdgcn_processor_is and __builtin_amdgcn_is_invocable
4540+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4541+
4542+
``__builtin_amdgcn_processor_is`` and ``__builtin_amdgcn_is_invocable`` provide
4543+
a functional mechanism for programatically querying:
4544+
4545+
* the identity of the current target processor;
4546+
* the capability of the current target processor to invoke a particular builtin.
4547+
4548+
**Syntax**:
4549+
4550+
.. code-block:: c
4551+
4552+
// When used as the predicate for a control structure
4553+
bool __builtin_amdgcn_processor_is(const char*);
4554+
bool __builtin_amdgcn_is_invocable(builtin_name);
4555+
// Otherwise
4556+
void __builtin_amdgcn_processor_is(const char*);
4557+
void __builtin_amdgcn_is_invocable(void);
4558+
4559+
**Example of use**:
4560+
4561+
.. code-block:: c++
4562+
4563+
if (__builtin_amdgcn_processor_is("gfx1201") ||
4564+
__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var))
4565+
__builtin_amdgcn_s_sleep_var(x);
4566+
4567+
if (!__builtin_amdgcn_processor_is("gfx906"))
4568+
__builtin_amdgcn_s_wait_event_export_ready();
4569+
else if (__builtin_amdgcn_processor_is("gfx1010") ||
4570+
__builtin_amdgcn_processor_is("gfx1101"))
4571+
__builtin_amdgcn_s_ttracedata_imm(1);
4572+
4573+
while (__builtin_amdgcn_processor_is("gfx1101")) *p += x;
4574+
4575+
do {
4576+
*p -= x;
4577+
} while (__builtin_amdgcn_processor_is("gfx1010"));
4578+
4579+
for (; __builtin_amdgcn_processor_is("gfx1201"); ++*p) break;
4580+
4581+
if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_wait_event_export_ready))
4582+
__builtin_amdgcn_s_wait_event_export_ready();
4583+
else if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_ttracedata_imm))
4584+
__builtin_amdgcn_s_ttracedata_imm(1);
4585+
4586+
do {
4587+
*p -= x;
4588+
} while (
4589+
__builtin_amdgcn_is_invocable(__builtin_amdgcn_global_load_tr_b64_i32));
4590+
4591+
for (; __builtin_amdgcn_is_invocable(__builtin_amdgcn_permlane64); ++*p)
4592+
break;
4593+
4594+
**Description**:
4595+
4596+
When used as the predicate value of the following control structures:
4597+
4598+
.. code-block:: c++
4599+
4600+
if (...)
4601+
while (...)
4602+
do { } while (...)
4603+
for (...)
4604+
4605+
be it directly, or as arguments to logical operators such as ``!, ||, &&``, the
4606+
builtins return a boolean value that:
4607+
4608+
* indicates whether the current target matches the argument; the argument MUST
4609+
be a string literal and a valid AMDGPU target
4610+
* indicates whether the builtin function passed as the argument can be invoked
4611+
by the current target; the argument MUST be either a generic or AMDGPU
4612+
specific builtin name
4613+
4614+
Outside of these contexts, the builtins have a ``void`` returning signature
4615+
which prevents their misuse.
4616+
4617+
**Example of invalid use**:
4618+
4619+
.. code-block:: c++
4620+
4621+
void kernel(int* p, int x, bool (*pfn)(bool), const char* str) {
4622+
if (__builtin_amdgcn_processor_is("not_an_amdgcn_gfx_id")) return;
4623+
else if (__builtin_amdgcn_processor_is(str)) __builtin_trap();
4624+
4625+
bool a = __builtin_amdgcn_processor_is("gfx906");
4626+
const bool b = !__builtin_amdgcn_processor_is("gfx906");
4627+
const bool c = !__builtin_amdgcn_processor_is("gfx906");
4628+
bool d = __builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var);
4629+
bool e = !__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var);
4630+
const auto f =
4631+
!__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_wait_event_export_ready)
4632+
|| __builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var);
4633+
const auto g =
4634+
!__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_wait_event_export_ready)
4635+
|| !__builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var);
4636+
__builtin_amdgcn_processor_is("gfx1201")
4637+
? __builtin_amdgcn_s_sleep_var(x) : __builtin_amdgcn_s_sleep(42);
4638+
if (pfn(__builtin_amdgcn_processor_is("gfx1200")))
4639+
__builtin_amdgcn_s_sleep_var(x);
4640+
4641+
if (__builtin_amdgcn_is_invocable("__builtin_amdgcn_s_sleep_var")) return;
4642+
else if (__builtin_amdgcn_is_invocable(x)) __builtin_trap();
4643+
}
4644+
4645+
When invoked while compiling for a concrete target, the builtins are evaluated
4646+
early by Clang, and never produce any CodeGen effects / have no observable
4647+
side-effects in IR. Conversely, when compiling for AMDGCN flavoured SPIR-v,
4648+
which is an abstract target, a series of predicate values are implicitly
4649+
created. These predicates get resolved when finalizing the compilation process
4650+
for a concrete target, and shall reflect the latter's identity and features.
4651+
Thus, it is possible to author high-level code, in e.g. HIP, that is target
4652+
adaptive in a dynamic fashion, contrary to macro based mechanisms.
45394653
45404654
ARM/AArch64 Language Extensions
45414655
-------------------------------

clang/docs/ReleaseNotes.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,6 +1161,10 @@ AMDGPU Support
11611161
CUDA, HIP, OpenCL, and C/C++.
11621162

11631163
- Bump the default code object version to 6.
1164+
- Introduced a new target specific builtin ``__builtin_amdgcn_processor_is``,
1165+
a late / deferred query for the current target processor
1166+
- Introduced a new target specific builtin ``__builtin_amdgcn_is_invocable``,
1167+
which enables fine-grained, per-builtin, feature availability
11641168

11651169
NVPTX Support
11661170
^^^^^^^^^^^^^^

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,11 @@ BUILTIN(__builtin_amdgcn_endpgm, "v", "nr")
349349
BUILTIN(__builtin_amdgcn_get_fpenv, "WUi", "n")
350350
BUILTIN(__builtin_amdgcn_set_fpenv, "vWUi", "n")
351351

352+
// These are special FE only builtins intended for forwarding the requirements
353+
// to the ME.
354+
BUILTIN(__builtin_amdgcn_processor_is, "vcC*", "nctu")
355+
BUILTIN(__builtin_amdgcn_is_invocable, "v", "nctu")
356+
352357
//===----------------------------------------------------------------------===//
353358
// R600-NI only builtins.
354359
//===----------------------------------------------------------------------===//

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12524,4 +12524,14 @@ def err_acc_loop_spec_conflict
1252412524
// AMDGCN builtins diagnostics
1252512525
def err_amdgcn_load_lds_size_invalid_value : Error<"invalid size value">;
1252612526
def note_amdgcn_load_lds_size_valid_value : Note<"size must be %select{1, 2, or 4|1, 2, 4, 12 or 16}0">;
12527+
def err_amdgcn_processor_is_arg_not_literal
12528+
: Error<"the argument to __builtin_amdgcn_processor_is must be a string "
12529+
"literal">;
12530+
def err_amdgcn_processor_is_arg_invalid_value
12531+
: Error<"the argument to __builtin_amdgcn_processor_is must be a valid "
12532+
"AMDGCN processor identifier; '%0' is not valid">;
12533+
def err_amdgcn_is_invocable_arg_invalid_value
12534+
: Error<"the argument to __builtin_amdgcn_is_invocable must be either a "
12535+
"target agnostic builtin or an AMDGCN target specific builtin; `%0`"
12536+
" is not valid">;
1252712537
} // end of sema component.

clang/lib/Basic/Targets/SPIR.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,3 +129,7 @@ void SPIRV64AMDGCNTargetInfo::setAuxTarget(const TargetInfo *Aux) {
129129
Float128Format = DoubleFormat;
130130
}
131131
}
132+
133+
bool SPIRV64AMDGCNTargetInfo::isValidCPUName(StringRef CPU) const {
134+
return AMDGPUTI.isValidCPUName(CPU);
135+
}

clang/lib/Basic/Targets/SPIR.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,10 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final
430430
}
431431

432432
bool hasInt128Type() const override { return TargetInfo::hasInt128Type(); }
433+
434+
// This is only needed for validating arguments passed to
435+
// __builtin_amdgcn_processor_is
436+
bool isValidCPUName(StringRef Name) const override;
433437
};
434438

435439
} // namespace targets

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16496,7 +16496,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1649616496
return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
1649716497
}
1649816498
case X86::BI__builtin_ia32_cvtsbf162ss_32:
16499-
return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
16499+
return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
1650016500

1650116501
case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
1650216502
case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
@@ -18542,6 +18542,18 @@ void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
1854218542
Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
1854318543
}
1854418544

18545+
static Value *GetOrInsertAMDGPUPredicate(CodeGenFunction &CGF, Twine Name) {
18546+
auto PTy = IntegerType::getInt1Ty(CGF.getLLVMContext());
18547+
18548+
auto *P = cast<GlobalVariable>(
18549+
CGF.CGM.getModule().getOrInsertGlobal(Name.str(), PTy));
18550+
P->setConstant(true);
18551+
P->setExternallyInitialized(true);
18552+
18553+
return CGF.Builder.CreateLoad(
18554+
RawAddress(P, PTy, CharUnits::One(), KnownNonNull));
18555+
}
18556+
1854518557
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1854618558
const CallExpr *E) {
1854718559
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
@@ -18894,6 +18906,23 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1889418906
llvm::Value *Env = EmitScalarExpr(E->getArg(0));
1889518907
return Builder.CreateCall(F, {Env});
1889618908
}
18909+
case AMDGPU::BI__builtin_amdgcn_processor_is: {
18910+
assert(CGM.getTriple().isSPIRV() &&
18911+
"__builtin_amdgcn_processor_is should never reach CodeGen for "
18912+
"concrete targets!");
18913+
StringRef Proc = cast<clang::StringLiteral>(E->getArg(0))->getString();
18914+
return GetOrInsertAMDGPUPredicate(*this, "llvm.amdgcn.is." + Proc);
18915+
}
18916+
case AMDGPU::BI__builtin_amdgcn_is_invocable: {
18917+
assert(CGM.getTriple().isSPIRV() &&
18918+
"__builtin_amdgcn_is_invocable should never reach CodeGen for "
18919+
"concrete targets!");
18920+
auto *FD = cast<FunctionDecl>(
18921+
cast<DeclRefExpr>(E->getArg(0))->getReferencedDeclOfCallee());
18922+
StringRef RF =
18923+
getContext().BuiltinInfo.getRequiredFeatures(FD->getBuiltinID());
18924+
return GetOrInsertAMDGPUPredicate(*this, "llvm.amdgcn.has." + RF);
18925+
}
1889718926
case AMDGPU::BI__builtin_amdgcn_read_exec:
1889818927
return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
1889918928
case AMDGPU::BI__builtin_amdgcn_read_exec_lo:

0 commit comments

Comments
 (0)