Skip to content

Commit a050277

Browse files
svenvhsys-ce-bb
authored andcommitted
Add cl_khr_kernel_clock support (#2582)
Add support for mapping the `cl_khr_kernel_clock` extension builtins to and from the `SPV_KHR_shader_clock` extension. Original commit: KhronosGroup/SPIRV-LLVM-Translator@abf48906dae1ed4
1 parent 7719244 commit a050277

File tree

6 files changed

+110
-0
lines changed

6 files changed

+110
-0
lines changed

llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,10 @@ void OCLToSPIRVBase::visitCallInst(CallInst &CI) {
340340
visitCallDot(&CI, MangledName, DemangledName);
341341
return;
342342
}
343+
if (DemangledName.starts_with(kOCLBuiltinName::ClockReadPrefix)) {
344+
visitCallClockRead(&CI, MangledName, DemangledName);
345+
return;
346+
}
343347
if (DemangledName == kOCLBuiltinName::FMin ||
344348
DemangledName == kOCLBuiltinName::FMax ||
345349
DemangledName == kOCLBuiltinName::Min ||
@@ -1324,6 +1328,23 @@ void OCLToSPIRVBase::visitCallDot(CallInst *CI, StringRef MangledName,
13241328
}
13251329
}
13261330

1331+
void OCLToSPIRVBase::visitCallClockRead(CallInst *CI, StringRef MangledName,
1332+
StringRef DemangledName) {
1333+
// The builtin returns i64 or <2 x i32>, but both variants are mapped to the
1334+
// same instruction; hence include the return type.
1335+
std::string OpName = getSPIRVFuncName(OpReadClockKHR, CI->getType());
1336+
1337+
// Scope is part of the OpenCL builtin name.
1338+
Scope ScopeArg = StringSwitch<Scope>(DemangledName)
1339+
.EndsWith("device", ScopeDevice)
1340+
.EndsWith("work_group", ScopeWorkgroup)
1341+
.EndsWith("sub_group", ScopeSubgroup)
1342+
.Default(ScopeMax);
1343+
1344+
auto Mutator = mutateCallInst(CI, OpName);
1345+
Mutator.appendArg(getInt32(M, ScopeArg));
1346+
}
1347+
13271348
void OCLToSPIRVBase::visitCallScalToVec(CallInst *CI, StringRef MangledName,
13281349
StringRef DemangledName) {
13291350
// Check if all arguments have the same type - it's simple case.

llvm-spirv/lib/SPIRV/OCLToSPIRV.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,10 @@ class OCLToSPIRVBase : public InstVisitor<OCLToSPIRVBase>, BuiltinCallHelper {
217217
void visitCallDot(CallInst *CI, StringRef MangledName,
218218
StringRef DemangledName);
219219

220+
/// Transform clock_read_* calls to OpReadClockKHR instructions.
221+
void visitCallClockRead(CallInst *CI, StringRef MangledName,
222+
StringRef DemangledName);
223+
220224
/// Fixes for built-in functions with vector+scalar arguments that are
221225
/// translated to the SPIR-V instructions where all arguments must have the
222226
/// same type.

llvm-spirv/lib/SPIRV/OCLUtil.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ const static char AtomicInit[] = "atomic_init";
237237
const static char AtomicWorkItemFence[] = "atomic_work_item_fence";
238238
const static char Barrier[] = "barrier";
239239
const static char Clamp[] = "clamp";
240+
const static char ClockReadPrefix[] = "clock_read_";
240241
const static char ConvertPrefix[] = "convert_";
241242
const static char Dot[] = "dot";
242243
const static char DotAccSat[] = "dot_acc_sat";

llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,10 @@ void SPIRVToOCLBase::visitCallInst(CallInst &CI) {
210210
visitCallSPIRVRelational(&CI, OC);
211211
return;
212212
}
213+
if (OC == OpReadClockKHR) {
214+
visitCallSPIRVReadClockKHR(&CI);
215+
return;
216+
}
213217
if (OC == internal::OpConvertFToBF16INTEL ||
214218
OC == internal::OpConvertBF16ToFINTEL) {
215219
visitCallSPIRVBFloat16Conversions(&CI, OC);
@@ -1021,6 +1025,33 @@ void SPIRVToOCLBase::visitCallSPIRVRelational(CallInst *CI, Op OC) {
10211025
});
10221026
}
10231027

1028+
void SPIRVToOCLBase::visitCallSPIRVReadClockKHR(CallInst *CI) {
1029+
std::ostringstream Name;
1030+
Name << "clock_read_";
1031+
1032+
if (CI->getType()->isVectorTy())
1033+
Name << "hilo_";
1034+
1035+
// Encode the scope (taken from the argument) in the function name.
1036+
ConstantInt *ScopeOp = cast<ConstantInt>(CI->getArgOperand(0));
1037+
switch (static_cast<Scope>(ScopeOp->getZExtValue())) {
1038+
case ScopeDevice:
1039+
Name << "device";
1040+
break;
1041+
case ScopeWorkgroup:
1042+
Name << "work_group";
1043+
break;
1044+
case ScopeSubgroup:
1045+
Name << "sub_group";
1046+
break;
1047+
default:
1048+
break;
1049+
}
1050+
1051+
auto Mutator = mutateCallInst(CI, Name.str());
1052+
Mutator.removeArg(0);
1053+
}
1054+
10241055
std::string SPIRVToOCLBase::getGroupBuiltinPrefix(CallInst *CI) {
10251056
std::string Prefix;
10261057
auto ES = getArgAsScope(CI, 0);

llvm-spirv/lib/SPIRV/SPIRVToOCL.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,9 @@ class SPIRVToOCLBase : public InstVisitor<SPIRVToOCLBase>,
241241
/// Transform relational builtin, e.g. __spirv_IsNan, to OpenCL builtin.
242242
void visitCallSPIRVRelational(CallInst *CI, Op OC);
243243

244+
/// Transform __spirv_ReadClockKHR to OpenCL builtin.
245+
void visitCallSPIRVReadClockKHR(CallInst *CI);
246+
244247
/// Conduct generic mutations for all atomic builtins
245248
virtual CallInst *mutateCommonAtomicArguments(CallInst *CI, Op OC) = 0;
246249

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// REQUIRES: spirv-dis
2+
// RUN: %clang_cc1 -triple spir-unknown-unknown -O1 -cl-std=CL2.0 -fdeclare-opencl-builtins -finclude-default-header -emit-llvm-bc %s -o %t.bc
3+
// RUN: llvm-spirv %t.bc --spirv-ext=+SPV_KHR_shader_clock -o %t.spv
4+
// RUN: spirv-dis %t.spv -o - | FileCheck %s --check-prefix=CHECK-SPIRV
5+
// TODO: spirv-val %t.spv
6+
// RUN: llvm-spirv -r %t.spv -o %t.rev.bc
7+
// RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM
8+
// RUN: llvm-spirv -r --spirv-target-env=SPV-IR %t.spv -o %t.rev.bc
9+
// RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-SPV-IR
10+
11+
// CHECK-SPIRV: OpCapability ShaderClockKHR
12+
// CHECK-SPIRV: OpExtension "SPV_KHR_shader_clock"
13+
// CHECK-SPIRV-DAG: [[uint:%[a-z0-9_]+]] = OpTypeInt 32
14+
// CHECK-SPIRV-DAG: [[ulong:%[a-z0-9_]+]] = OpTypeInt 64
15+
// CHECK-SPIRV-DAG: [[v2uint:%[a-z0-9_]+]] = OpTypeVector [[uint]] 2
16+
// CHECK-SPIRV-DAG: [[uint_1:%[a-z0-9_]+]] = OpConstant [[uint]] 1
17+
// CHECK-SPIRV-DAG: [[uint_2:%[a-z0-9_]+]] = OpConstant [[uint]] 2
18+
// CHECK-SPIRV-DAG: [[uint_3:%[a-z0-9_]+]] = OpConstant [[uint]] 3
19+
// CHECK-SPIRV: OpReadClockKHR [[ulong]] [[uint_1]]
20+
// CHECK-SPIRV: OpReadClockKHR [[ulong]] [[uint_2]]
21+
// CHECK-SPIRV: OpReadClockKHR [[ulong]] [[uint_3]]
22+
// CHECK-SPIRV: OpReadClockKHR [[v2uint]] [[uint_1]]
23+
// CHECK-SPIRV: OpReadClockKHR [[v2uint]] [[uint_2]]
24+
// CHECK-SPIRV: OpReadClockKHR [[v2uint]] [[uint_3]]
25+
26+
// CHECK-LLVM-LABEL: test_clocks
27+
// CHECK-LLVM: call spir_func i64 @_Z17clock_read_devicev()
28+
// CHECK-LLVM: call spir_func i64 @_Z21clock_read_work_groupv()
29+
// CHECK-LLVM: call spir_func i64 @_Z20clock_read_sub_groupv()
30+
// CHECK-LLVM: call spir_func <2 x i32> @_Z22clock_read_hilo_devicev()
31+
// CHECK-LLVM: call spir_func <2 x i32> @_Z26clock_read_hilo_work_groupv()
32+
// CHECK-LLVM: call spir_func <2 x i32> @_Z25clock_read_hilo_sub_groupv()
33+
34+
// CHECK-SPV-IR-LABEL: test_clocks
35+
// CHECK-SPV-IR: call spir_func i64 @_Z27__spirv_ReadClockKHR_Rulongi(i32 1)
36+
// CHECK-SPV-IR: call spir_func i64 @_Z27__spirv_ReadClockKHR_Rulongi(i32 2)
37+
// CHECK-SPV-IR: call spir_func i64 @_Z27__spirv_ReadClockKHR_Rulongi(i32 3)
38+
// CHECK-SPV-IR: call spir_func <2 x i32> @_Z27__spirv_ReadClockKHR_Ruint2i(i32 1)
39+
// CHECK-SPV-IR: call spir_func <2 x i32> @_Z27__spirv_ReadClockKHR_Ruint2i(i32 2)
40+
// CHECK-SPV-IR: call spir_func <2 x i32> @_Z27__spirv_ReadClockKHR_Ruint2i(i32 3)
41+
42+
kernel void test_clocks(global ulong *out64, global uint2 *outv2) {
43+
out64[0] = clock_read_device();
44+
out64[1] = clock_read_work_group();
45+
out64[2] = clock_read_sub_group();
46+
47+
outv2[0] = clock_read_hilo_device();
48+
outv2[1] = clock_read_hilo_work_group();
49+
outv2[2] = clock_read_hilo_sub_group();
50+
}

0 commit comments

Comments
 (0)