Skip to content

Commit 833a9fe

Browse files
[SYCL] Fix handling of structs with trailing padding in SpecConstants pass (#10211)
This patch fixes a regression accidentally introduced in #9874 (presumably because we didn't have the right test). SYCL-CTS produces an LLVM IR pattern, where spec constant initializer contains less elements than there are in a spec constant type, because some of those elements are implicitly-created paddings. The patch updates the pass to handle this situation. Note: this PR only covers a case when padding is inserted at the end of a struct. Resolves #10129
1 parent 93f4773 commit 833a9fe

File tree

3 files changed

+226
-13
lines changed

3 files changed

+226
-13
lines changed
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
; LLVM IR for this test is produced from the following SYCL code snippet:
2+
;
3+
; #include <sycl/sycl.hpp>
4+
;
5+
; struct user_defined_type {
6+
; float a;
7+
; int b;
8+
; char c;
9+
;
10+
; constexpr user_defined_type(float a, int b, char c) : a(a), b(b), c(c) {}
11+
; };
12+
;
13+
; constexpr sycl::specialization_id<user_defined_type> spec_id(3.14, 42, 8);
14+
;
15+
; int main() {
16+
; sycl::queue q;
17+
; user_defined_type data(0, 0, 0);
18+
; sycl::buffer buf(&data, sycl::range<1>{1});
19+
; q.submit([&](sycl::handler &cgh) {
20+
; auto acc = buf.get_access();
21+
; cgh.single_task([=](sycl::kernel_handler kh) {
22+
; acc[0] = kh.get_specialization_constant<spec_id>();
23+
; });
24+
; });
25+
;
26+
; return 0;
27+
; }
28+
;
29+
; Compiled with: clang++ -fsycl -fsycl-device-only -O2 -emit-llvm -S -fno-sycl-instrument-device-code
30+
;
31+
; 'user_defined_type' is taken from SYCL-CTS for spec constants.
32+
;
33+
; The idea of the test is to ensure that SpecConstants pass is able to handle
34+
; a situation, where spec constant default value contains less elements than
35+
; spec constant type, due to padding inserted by a compiler.
36+
;
37+
; RUN: sycl-post-link --spec-const=rt -S < %s -o %t.files.table
38+
; RUN: FileCheck %s -input-file=%t.files_0.ll
39+
;
40+
; CHECK: %[[#A:]] = call float @_Z20__spirv_SpecConstantif(i32 [[#ID:]], float 0x40091EB860000000)
41+
; CHECK: %[[#B:]] = call i32 @_Z20__spirv_SpecConstantii(i32 [[#ID+1]], i32 42)
42+
; CHECK: %[[#C:]] = call i8 @_Z20__spirv_SpecConstantia(i32 2, i8 8)
43+
; CHECK: call %struct.user_defined_type @_Z29__spirv_SpecConstantCompositefiaA3_a_Rstruct.user_defined_type(float %[[#A]], i32 %[[#B]], i8 %[[#C]], [3 x i8] undef)
44+
;
45+
; CHECK: !sycl.specialization-constants = !{![[#SC:]]}
46+
; CHECK: ![[#SC]] = !{!"uidac684fbd602505be____ZL7spec_id",
47+
; CHECK-SAME: i32 [[#ID]], i32 0, i32 4
48+
; CHECK-SAME: i32 [[#ID+1]], i32 4, i32 4
49+
; CHECK-SAME: i32 [[#ID+2]], i32 8, i32 1
50+
; CHECK-SAME: i32 -1, i32 9, i32 3
51+
52+
source_filename = "t.cpp"
53+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
54+
target triple = "spir64-unknown-unknown"
55+
56+
%struct.user_defined_type = type <{ float, i32, i8, [3 x i8] }>
57+
%"class.sycl::_V1::id" = type { %"class.sycl::_V1::detail::array" }
58+
%"class.sycl::_V1::detail::array" = type { [1 x i64] }
59+
60+
$_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlNS0_14kernel_handlerEE_ = comdat any
61+
62+
@__usid_str = private unnamed_addr constant [34 x i8] c"uidac684fbd602505be____ZL7spec_id\00", align 1
63+
@_ZL7spec_id = internal addrspace(1) constant { { float, i32, i8 } } { { float, i32, i8 } { float 0x40091EB860000000, i32 42, i8 8 } }, align 4
64+
65+
; Function Attrs: convergent norecurse nounwind
66+
define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlNS0_14kernel_handlerEE_(%struct.user_defined_type addrspace(1)* noundef align 4 %_arg_acc, %"class.sycl::_V1::id"* noundef byval(%"class.sycl::_V1::id") align 8 %_arg_acc3) local_unnamed_addr #0 comdat !srcloc !48 !kernel_arg_buffer_location !49 !kernel_arg_runtime_aligned !50 !kernel_arg_exclusive_ptr !50 !sycl_fixed_targets !51 !sycl_kernel_omit_args !52 {
67+
entry:
68+
%ref.tmp.i = alloca %struct.user_defined_type, align 4
69+
%0 = bitcast %"class.sycl::_V1::id"* %_arg_acc3 to i64*
70+
%1 = load i64, i64* %0, align 8
71+
%add.ptr.i = getelementptr inbounds %struct.user_defined_type, %struct.user_defined_type addrspace(1)* %_arg_acc, i64 %1
72+
%ref.tmp.ascast.i = addrspacecast %struct.user_defined_type* %ref.tmp.i to %struct.user_defined_type addrspace(4)*
73+
%2 = bitcast %struct.user_defined_type* %ref.tmp.i to i8*
74+
call void @llvm.lifetime.start.p0i8(i64 12, i8* nonnull %2) #4
75+
call spir_func void @_Z40__sycl_getComposite2020SpecConstantValueI17user_defined_typeET_PKcPKvS5_(%struct.user_defined_type addrspace(4)* sret(%struct.user_defined_type) align 4 %ref.tmp.ascast.i, i8 addrspace(4)* noundef addrspacecast (i8* getelementptr inbounds ([34 x i8], [34 x i8]* @__usid_str, i64 0, i64 0) to i8 addrspace(4)*), i8 addrspace(4)* noundef addrspacecast (i8 addrspace(1)* bitcast ({ { float, i32, i8 } } addrspace(1)* @_ZL7spec_id to i8 addrspace(1)*) to i8 addrspace(4)*), i8 addrspace(4)* noundef null) #5
76+
%3 = bitcast %struct.user_defined_type addrspace(1)* %add.ptr.i to i8 addrspace(1)*
77+
%4 = bitcast %struct.user_defined_type* %ref.tmp.i to i8*
78+
call void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* align 4 %3, i8* align 4 %4, i64 9, i1 false), !tbaa.struct !53
79+
call void @llvm.lifetime.end.p0i8(i64 12, i8* nonnull %2) #4
80+
ret void
81+
}
82+
83+
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
84+
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
85+
86+
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
87+
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
88+
89+
; Function Attrs: convergent nounwind
90+
declare dso_local spir_func void @_Z40__sycl_getComposite2020SpecConstantValueI17user_defined_typeET_PKcPKvS5_(%struct.user_defined_type addrspace(4)* sret(%struct.user_defined_type) align 4, i8 addrspace(4)* noundef, i8 addrspace(4)* noundef, i8 addrspace(4)* noundef) local_unnamed_addr #2
91+
92+
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
93+
declare void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #3
94+
95+
declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(i8 addrspace(2)*, ...)
96+
97+
attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="t.cpp" "sycl-optlevel"="2" "sycl-single-task" "uniform-work-group-size"="true" }
98+
attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
99+
attributes #2 = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
100+
attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
101+
attributes #4 = { nounwind }
102+
attributes #5 = { convergent nounwind }
103+
104+
!llvm.module.flags = !{!0, !1}
105+
!opencl.spir.version = !{!2}
106+
!spirv.Source = !{!3}
107+
!sycl_aspects = !{!4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !45, !46}
108+
!llvm.ident = !{!47}
109+
110+
!0 = !{i32 1, !"wchar_size", i32 4}
111+
!1 = !{i32 7, !"frame-pointer", i32 2}
112+
!2 = !{i32 1, i32 2}
113+
!3 = !{i32 4, i32 100000}
114+
!4 = !{!"cpu", i32 1}
115+
!5 = !{!"gpu", i32 2}
116+
!6 = !{!"accelerator", i32 3}
117+
!7 = !{!"custom", i32 4}
118+
!8 = !{!"fp16", i32 5}
119+
!9 = !{!"fp64", i32 6}
120+
!10 = !{!"image", i32 9}
121+
!11 = !{!"online_compiler", i32 10}
122+
!12 = !{!"online_linker", i32 11}
123+
!13 = !{!"queue_profiling", i32 12}
124+
!14 = !{!"usm_device_allocations", i32 13}
125+
!15 = !{!"usm_host_allocations", i32 14}
126+
!16 = !{!"usm_shared_allocations", i32 15}
127+
!17 = !{!"usm_system_allocations", i32 17}
128+
!18 = !{!"ext_intel_pci_address", i32 18}
129+
!19 = !{!"ext_intel_gpu_eu_count", i32 19}
130+
!20 = !{!"ext_intel_gpu_eu_simd_width", i32 20}
131+
!21 = !{!"ext_intel_gpu_slices", i32 21}
132+
!22 = !{!"ext_intel_gpu_subslices_per_slice", i32 22}
133+
!23 = !{!"ext_intel_gpu_eu_count_per_subslice", i32 23}
134+
!24 = !{!"ext_intel_max_mem_bandwidth", i32 24}
135+
!25 = !{!"ext_intel_mem_channel", i32 25}
136+
!26 = !{!"usm_atomic_host_allocations", i32 26}
137+
!27 = !{!"usm_atomic_shared_allocations", i32 27}
138+
!28 = !{!"atomic64", i32 28}
139+
!29 = !{!"ext_intel_device_info_uuid", i32 29}
140+
!30 = !{!"ext_oneapi_srgb", i32 30}
141+
!31 = !{!"ext_oneapi_native_assert", i32 31}
142+
!32 = !{!"host_debuggable", i32 32}
143+
!33 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33}
144+
!34 = !{!"ext_oneapi_cuda_async_barrier", i32 34}
145+
!35 = !{!"ext_oneapi_bfloat16_math_functions", i32 35}
146+
!36 = !{!"ext_intel_free_memory", i32 36}
147+
!37 = !{!"ext_intel_device_id", i32 37}
148+
!38 = !{!"ext_intel_memory_clock_rate", i32 38}
149+
!39 = !{!"ext_intel_memory_bus_width", i32 39}
150+
!40 = !{!"emulated", i32 40}
151+
!41 = !{!"ext_intel_legacy_image", i32 41}
152+
!42 = !{!"int64_base_atomics", i32 7}
153+
!43 = !{!"int64_extended_atomics", i32 8}
154+
!44 = !{!"usm_system_allocator", i32 17}
155+
!45 = !{!"usm_restricted_shared_allocations", i32 16}
156+
!46 = !{!"host", i32 0}
157+
!47 = !{!"clang version 17.0.0 (https://github.com/intel/llvm.git)"}
158+
!48 = !{i32 443}
159+
!49 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1}
160+
!50 = !{i1 true, i1 false, i1 false, i1 false, i1 false}
161+
!51 = !{}
162+
!52 = !{i1 false, i1 true, i1 true, i1 false, i1 true}
163+
!53 = !{i64 0, i64 4, !54, i64 4, i64 4, !58, i64 8, i64 1, !60}
164+
!54 = !{!55, !55, i64 0}
165+
!55 = !{!"float", !56, i64 0}
166+
!56 = !{!"omnipotent char", !57, i64 0}
167+
!57 = !{!"Simple C++ TBAA"}
168+
!58 = !{!59, !59, i64 0}
169+
!59 = !{!"int", !56, i64 0}
170+
!60 = !{!56, !56, i64 0}

llvm/tools/sycl-post-link/SpecConstants.cpp

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,7 @@ Instruction *emitSpecConstantRecursiveImpl(Type *Ty, Instruction *InsertBefore,
545545
if (Index >= IDs.size()) {
546546
// If it is a new specialization constant, we need to generate IDs for
547547
// scalar elements, starting with the second one.
548-
assert(!isa_and_nonnull<UndefValue>(DefaultValue) &&
548+
assert(!isa<UndefValue>(DefaultValue) &&
549549
"All scalar values should be defined");
550550
IDs.push_back({IDs.back().ID + 1, false});
551551
}
@@ -562,22 +562,26 @@ Instruction *emitSpecConstantRecursiveImpl(Type *Ty, Instruction *InsertBefore,
562562
Elements.push_back(Def);
563563
};
564564
auto LoopIteration = [&](Type *Ty, unsigned LocalIndex) {
565-
// Select corresponding element of the default value if it was provided
566-
Constant *Def =
567-
DefaultValue ? DefaultValue->getAggregateElement(LocalIndex) : nullptr;
568-
if (isa_and_nonnull<UndefValue>(Def))
569-
HandleUndef(Def);
570-
else
571-
Elements.push_back(
572-
emitSpecConstantRecursiveImpl(Ty, InsertBefore, IDs, Index, Def));
573-
};
565+
// Select corresponding element of the default value.
566+
// There are cases when provided default value contains less elements than
567+
// specialization constants: it could happen when a struct is extended with
568+
// a padding to make its size aligned. In such cases, we simply initialize
569+
// any "extra" elements with undef.
570+
Constant *ElemDefaultValue = DefaultValue->getAggregateElement(LocalIndex);
571+
if (!ElemDefaultValue)
572+
ElemDefaultValue = UndefValue::get(Ty);
574573

575-
if (isa_and_nonnull<UndefValue>(DefaultValue)) {
576574
// If the default value is a composite and has the value 'undef', we should
577575
// not generate a bunch of __spirv_SpecConstant for its elements but
578576
// pass it into __spirv_SpecConstantComposite as is.
579-
HandleUndef(DefaultValue);
580-
} else if (auto *ArrTy = dyn_cast<ArrayType>(Ty)) {
577+
if (isa<UndefValue>(ElemDefaultValue))
578+
HandleUndef(ElemDefaultValue);
579+
else
580+
Elements.push_back(emitSpecConstantRecursiveImpl(
581+
Ty, InsertBefore, IDs, Index, ElemDefaultValue));
582+
};
583+
584+
if (auto *ArrTy = dyn_cast<ArrayType>(Ty)) {
581585
for (size_t I = 0; I < ArrTy->getNumElements(); ++I) {
582586
LoopIteration(ArrTy->getElementType(), I);
583587
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
//
4+
#include <sycl/sycl.hpp>
5+
6+
struct user_defined_type {
7+
float a;
8+
int b;
9+
char c;
10+
11+
constexpr user_defined_type(float a, int b, char c) : a(a), b(b), c(c) {}
12+
constexpr user_defined_type(const user_defined_type &) = default;
13+
14+
bool operator==(const user_defined_type &other) const {
15+
return other.a == a && other.b == b && other.c == c;
16+
}
17+
};
18+
19+
constexpr user_defined_type reference(3.14, 42, 8);
20+
constexpr sycl::specialization_id<user_defined_type> spec_id(reference);
21+
22+
int main() {
23+
sycl::queue q;
24+
user_defined_type data(0, 0, 0);
25+
26+
{
27+
sycl::buffer buf(&data, sycl::range<1>{1});
28+
q.submit([&](sycl::handler &cgh) {
29+
auto acc = buf.get_access(cgh);
30+
cgh.single_task([=](sycl::kernel_handler kh) {
31+
acc[0] = kh.get_specialization_constant<spec_id>();
32+
});
33+
}).wait();
34+
}
35+
36+
assert(reference == data);
37+
38+
return 0;
39+
}

0 commit comments

Comments
 (0)