Skip to content

Commit e481174

Browse files
[sycl-post-link] Add support for composite specialization constants (#2779)
Design document can be found in sycl/doc/SpecializationConstants.md.
1 parent 5e5703f commit e481174

File tree

10 files changed

+1151
-96
lines changed

10 files changed

+1151
-96
lines changed

llvm/include/llvm/Support/PropertySetIO.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ class PropertyValue {
8080

8181
PropertyValue(uint32_t Val) : Ty(UINT32), Val({Val}) {}
8282
PropertyValue(const byte *Data, SizeTy DataBitSize);
83+
template <typename T>
84+
PropertyValue(const std::vector<T> &Data)
85+
: PropertyValue(reinterpret_cast<const byte *>(Data.data()),
86+
Data.size() * sizeof(T) * /* bits in one byte */ 8) {}
8387
PropertyValue(const PropertyValue &P);
8488
PropertyValue(PropertyValue &&P);
8589

@@ -179,6 +183,8 @@ class PropertySetRegistry {
179183
// Specific property category names used by tools.
180184
static constexpr char SYCL_SPECIALIZATION_CONSTANTS[] =
181185
"SYCL/specialization constants";
186+
static constexpr char SYCL_COMPOSITE_SPECIALIZATION_CONSTANTS[] =
187+
"SYCL/composite specialization constants";
182188
static constexpr char SYCL_DEVICELIB_REQ_MASK[] = "SYCL/devicelib req mask";
183189
static constexpr char SYCL_KERNEL_PARAM_OPT_INFO[] = "SYCL/kernel param opt";
184190

llvm/lib/Support/PropertySetIO.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ void PropertyValue::copy(const PropertyValue &P) {
197197
constexpr char PropertySetRegistry::SYCL_SPECIALIZATION_CONSTANTS[];
198198
constexpr char PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK[];
199199
constexpr char PropertySetRegistry::SYCL_KERNEL_PARAM_OPT_INFO[];
200+
constexpr char PropertySetRegistry::SYCL_COMPOSITE_SPECIALIZATION_CONSTANTS[];
200201

201202
} // namespace util
202203
} // namespace llvm

llvm/test/tools/sycl-post-link/composite-spec-constant-O0.ll

Lines changed: 469 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
; RUN: sycl-post-link -spec-const=default --ir-output-only %s -S -o - \
2+
; RUN: | FileCheck %s --implicit-check-not __sycl_getCompositeSpecConstantValue
3+
;
4+
; This test checks that composite specialization constants can be correctly
5+
; initialized by sycl-post-link tool for AOT use-case (default initialization
6+
; should be used according to the type of constant)
7+
;
8+
; TODO: consider adding a test case with vector type: the pass itself already
9+
; supports this, but at the moment, sycl::vec type is not a POD type, which
10+
; means we can't have it within a spec constant, i.e. we can't generate LLVM IR
11+
; from a real-life application to use it as a test here.
12+
;
13+
; CHECK: %[[#CAST:]] = addrspacecast %struct._ZTS3POD.POD* %ref.tmp.i
14+
; CHECK: store %struct._ZTS3POD.POD zeroinitializer, %struct._ZTS3POD.POD {{.*}}* %[[#CAST]]
15+
16+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
17+
target triple = "spir64-unknown-unknown-sycldevice"
18+
19+
%struct._ZTS3POD.POD = type { [2 x %struct._ZTS1A.A], %"class._ZTSN2cl4sycl3vecIiLi2EEE.cl::sycl::vec" }
20+
%struct._ZTS1A.A = type { i32, float }
21+
%"class._ZTSN2cl4sycl3vecIiLi2EEE.cl::sycl::vec" = type { <2 x i32> }
22+
%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
23+
%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] }
24+
%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
25+
26+
$_ZTS4Test = comdat any
27+
28+
@__builtin_unique_stable_name._ZNK2cl4sycl6ONEAPI12experimental13spec_constantI3PODS4_E3getIS4_EENSt9enable_ifIXsr3std6is_podIT_EE5valueES8_E4typeEv = private unnamed_addr addrspace(1) constant [9 x i8] c"_ZTS3POD\00", align 1
29+
30+
; Function Attrs: convergent norecurse uwtable
31+
define weak_odr dso_local spir_kernel void @_ZTS4Test(%struct._ZTS3POD.POD addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 {
32+
entry:
33+
%ref.tmp.i = alloca %struct._ZTS3POD.POD, align 8
34+
%0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
35+
%1 = load i64, i64* %0, align 8
36+
%add.ptr.i = getelementptr inbounds %struct._ZTS3POD.POD, %struct._ZTS3POD.POD addrspace(1)* %_arg_, i64 %1
37+
%2 = bitcast %struct._ZTS3POD.POD* %ref.tmp.i to i8*
38+
call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %2) #3
39+
%3 = addrspacecast %struct._ZTS3POD.POD* %ref.tmp.i to %struct._ZTS3POD.POD addrspace(4)*
40+
call spir_func void @_Z36__sycl_getCompositeSpecConstantValueI3PODET_PKc(%struct._ZTS3POD.POD addrspace(4)* sret align 8 %3, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([9 x i8], [9 x i8] addrspace(1)* @__builtin_unique_stable_name._ZNK2cl4sycl6ONEAPI12experimental13spec_constantI3PODS4_E3getIS4_EENSt9enable_ifIXsr3std6is_podIT_EE5valueES8_E4typeEv, i64 0, i64 0) to i8 addrspace(4)*)) #4
41+
%4 = bitcast %struct._ZTS3POD.POD addrspace(1)* %add.ptr.i to i8 addrspace(1)*
42+
%5 = addrspacecast i8 addrspace(1)* %4 to i8 addrspace(4)*
43+
call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 8 dereferenceable(24) %5, i8* nonnull align 8 dereferenceable(24) %2, i64 24, i1 false), !tbaa.struct !5
44+
call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %2) #3
45+
ret void
46+
}
47+
48+
; Function Attrs: argmemonly nounwind willreturn
49+
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
50+
51+
; Function Attrs: argmemonly nounwind willreturn
52+
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
53+
54+
; Function Attrs: argmemonly nounwind willreturn
55+
declare void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1
56+
57+
; Function Attrs: convergent
58+
declare dso_local spir_func void @_Z36__sycl_getCompositeSpecConstantValueI3PODET_PKc(%struct._ZTS3POD.POD addrspace(4)* sret align 8, i8 addrspace(4)*) local_unnamed_addr #2
59+
60+
attributes #0 = { convergent norecurse uwtable "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="../sycl/test/spec_const/composite.cpp" "tune-cpu"="generic" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
61+
attributes #1 = { argmemonly nounwind willreturn }
62+
attributes #2 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
63+
attributes #3 = { nounwind }
64+
attributes #4 = { convergent }
65+
66+
!llvm.module.flags = !{!0}
67+
!opencl.spir.version = !{!1}
68+
!spirv.Source = !{!2}
69+
!llvm.ident = !{!3}
70+
71+
!0 = !{i32 1, !"wchar_size", i32 4}
72+
!1 = !{i32 1, i32 2}
73+
!2 = !{i32 4, i32 100000}
74+
!3 = !{!"clang version 12.0.0 (/data/github.com/intel/llvm/clang 56ee5b054b5a1f2f703722fc414fcb05af18b40a)"}
75+
!4 = !{i32 -1, i32 -1, i32 -1, i32 -1}
76+
!5 = !{i64 0, i64 16, !6, i64 16, i64 8, !6}
77+
!6 = !{!7, !7, i64 0}
78+
!7 = !{!"omnipotent char", !8, i64 0}
79+
!8 = !{!"Simple C++ TBAA"}
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
; RUN: sycl-post-link -spec-const=rt --ir-output-only %s -S -o - \
2+
; RUN: | FileCheck %s --implicit-check-not __sycl_getCompositeSpecConstantValue
3+
;
4+
; This test is intended to check that sycl-post-link tool is capable of handling
5+
; composite specialization constants by lowering them into a set of SPIR-V
6+
; friendly IR operations representing those constants.
7+
;
8+
; CHECK: %[[#NS0:]] = call i32 @_Z20__spirv_SpecConstantii(i32 [[#ID:]], i32
9+
; CHECK: %[[#NS1:]] = call float @_Z20__spirv_SpecConstantif(i32 [[#ID + 1]], float
10+
; CHECK: %[[#NA0:]] = call %struct._ZTS1A.A @_Z29__spirv_SpecConstantCompositeif(i32 %[[#NS0]], float %[[#NS1]])
11+
;
12+
; CHECK: %[[#NS2:]] = call i32 @_Z20__spirv_SpecConstantii(i32 [[#ID + 2]], i32
13+
; CHECK: %[[#NS3:]] = call float @_Z20__spirv_SpecConstantif(i32 [[#ID + 3]], float
14+
; CHECK: %[[#NA1:]] = call %struct._ZTS1A.A @_Z29__spirv_SpecConstantCompositeif(i32 %[[#NS2]], float %[[#NS3]])
15+
;
16+
; CHECK: %[[#NA:]] = call [2 x %struct._ZTS1A.A] @_Z29__spirv_SpecConstantCompositestruct._ZTS1A.Astruct._ZTS1A.A(%struct._ZTS1A.A %[[#NA0]], %struct._ZTS1A.A %[[#NA1]])
17+
;
18+
; CHECK: %[[#B0:]] = call i32 @_Z20__spirv_SpecConstantii(i32 [[#ID + 4]], i32{{.*}})
19+
; CHECK: %[[#B1:]] = call i32 @_Z20__spirv_SpecConstantii(i32 [[#ID + 5]], i32{{.*}})
20+
; CHECK: %[[#BV:]] = call <2 x i32> @_Z29__spirv_SpecConstantCompositeii(i32 %[[#B0]], i32 %[[#B1]])
21+
; CHECK: %[[#B:]] = call %"class._ZTSN2cl4sycl3vecIiLi2EEE.cl::sycl::vec" @_Z29__spirv_SpecConstantCompositeDv2_i(<2 x i32> %[[#BV]])
22+
;
23+
; CHECK: %[[#POD:]] = call %struct._ZTS3POD.POD @"_Z29__spirv_SpecConstantCompositeAstruct._ZTS1A.Aclass._ZTSN2cl4sycl3vecIiLi2EEE.cl::sycl::vec"([2 x %struct._ZTS1A.A] %[[#NA]], %"class._ZTSN2cl4sycl3vecIiLi2EEE.cl::sycl::vec" %[[#B]]), !SYCL_SPEC_CONST_SYM_ID ![[#MD:]]
24+
; CHECK: store %struct._ZTS3POD.POD %[[#POD]]
25+
;
26+
; CHECK: ![[#MD]] = !{!"_ZTS3POD", i32 [[#ID]], i32 [[#ID + 1]], i32 [[#ID + 2]], i32 [[#ID + 3]], i32 [[#ID + 4]], i32 [[#ID + 5]]}
27+
28+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
29+
target triple = "spir64-unknown-unknown-sycldevice"
30+
31+
%struct._ZTS3POD.POD = type { [2 x %struct._ZTS1A.A], %"class._ZTSN2cl4sycl3vecIiLi2EEE.cl::sycl::vec" }
32+
%struct._ZTS1A.A = type { i32, float }
33+
%"class._ZTSN2cl4sycl3vecIiLi2EEE.cl::sycl::vec" = type { <2 x i32> }
34+
%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
35+
%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] }
36+
%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
37+
38+
$_ZTS4Test = comdat any
39+
40+
@__builtin_unique_stable_name._ZNK2cl4sycl6ONEAPI12experimental13spec_constantI3PODS4_E3getIS4_EENSt9enable_ifIXsr3std6is_podIT_EE5valueES8_E4typeEv = private unnamed_addr addrspace(1) constant [9 x i8] c"_ZTS3POD\00", align 1
41+
42+
; Function Attrs: convergent norecurse uwtable
43+
define weak_odr dso_local spir_kernel void @_ZTS4Test(%struct._ZTS3POD.POD addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 {
44+
entry:
45+
%ref.tmp.i = alloca %struct._ZTS3POD.POD, align 8
46+
%0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
47+
%1 = load i64, i64* %0, align 8
48+
%add.ptr.i = getelementptr inbounds %struct._ZTS3POD.POD, %struct._ZTS3POD.POD addrspace(1)* %_arg_, i64 %1
49+
%2 = bitcast %struct._ZTS3POD.POD* %ref.tmp.i to i8*
50+
call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %2) #3
51+
%3 = addrspacecast %struct._ZTS3POD.POD* %ref.tmp.i to %struct._ZTS3POD.POD addrspace(4)*
52+
call spir_func void @_Z36__sycl_getCompositeSpecConstantValueI3PODET_PKc(%struct._ZTS3POD.POD addrspace(4)* sret align 8 %3, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([9 x i8], [9 x i8] addrspace(1)* @__builtin_unique_stable_name._ZNK2cl4sycl6ONEAPI12experimental13spec_constantI3PODS4_E3getIS4_EENSt9enable_ifIXsr3std6is_podIT_EE5valueES8_E4typeEv, i64 0, i64 0) to i8 addrspace(4)*)) #4
53+
%4 = bitcast %struct._ZTS3POD.POD addrspace(1)* %add.ptr.i to i8 addrspace(1)*
54+
%5 = addrspacecast i8 addrspace(1)* %4 to i8 addrspace(4)*
55+
call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 8 dereferenceable(24) %5, i8* nonnull align 8 dereferenceable(24) %2, i64 24, i1 false), !tbaa.struct !5
56+
call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %2) #3
57+
ret void
58+
}
59+
60+
; Function Attrs: argmemonly nounwind willreturn
61+
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
62+
63+
; Function Attrs: argmemonly nounwind willreturn
64+
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
65+
66+
; Function Attrs: argmemonly nounwind willreturn
67+
declare void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1
68+
69+
; Function Attrs: convergent
70+
declare dso_local spir_func void @_Z36__sycl_getCompositeSpecConstantValueI3PODET_PKc(%struct._ZTS3POD.POD addrspace(4)* sret align 8, i8 addrspace(4)*) local_unnamed_addr #2
71+
72+
attributes #0 = { convergent norecurse uwtable "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="../sycl/test/spec_const/composite.cpp" "tune-cpu"="generic" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
73+
attributes #1 = { argmemonly nounwind willreturn }
74+
attributes #2 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
75+
attributes #3 = { nounwind }
76+
attributes #4 = { convergent }
77+
78+
!llvm.module.flags = !{!0}
79+
!opencl.spir.version = !{!1}
80+
!spirv.Source = !{!2}
81+
!llvm.ident = !{!3}
82+
83+
!0 = !{i32 1, !"wchar_size", i32 4}
84+
!1 = !{i32 1, i32 2}
85+
!2 = !{i32 4, i32 100000}
86+
!3 = !{!"clang version 12.0.0 "}
87+
!4 = !{i32 -1, i32 -1, i32 -1, i32 -1}
88+
!5 = !{i64 0, i64 16, !6, i64 16, i64 8, !6}
89+
!6 = !{!7, !7, i64 0}
90+
!7 = !{!"omnipotent char", !8, i64 0}
91+
!8 = !{!"Simple C++ TBAA"}

0 commit comments

Comments
 (0)