|
| 1 | +; LLVM IR for this test is produced from the following SYCL code snippet: |
| 2 | +; |
| 3 | +; #include <sycl/sycl.hpp> |
| 4 | +; |
| 5 | +; struct user_defined_type { |
| 6 | +; float a; |
| 7 | +; int b; |
| 8 | +; char c; |
| 9 | +; |
| 10 | +; constexpr user_defined_type(float a, int b, char c) : a(a), b(b), c(c) {} |
| 11 | +; }; |
| 12 | +; |
| 13 | +; constexpr sycl::specialization_id<user_defined_type> spec_id(3.14, 42, 8); |
| 14 | +; |
| 15 | +; int main() { |
| 16 | +; sycl::queue q; |
| 17 | +; user_defined_type data(0, 0, 0); |
| 18 | +; sycl::buffer buf(&data, sycl::range<1>{1}); |
| 19 | +; q.submit([&](sycl::handler &cgh) { |
| 20 | +; auto acc = buf.get_access(); |
| 21 | +; cgh.single_task([=](sycl::kernel_handler kh) { |
| 22 | +; acc[0] = kh.get_specialization_constant<spec_id>(); |
| 23 | +; }); |
| 24 | +; }); |
| 25 | +; |
| 26 | +; return 0; |
| 27 | +; } |
| 28 | +; |
| 29 | +; Compiled with: clang++ -fsycl -fsycl-device-only -O2 -emit-llvm -S -fno-sycl-instrument-device-code |
| 30 | +; |
| 31 | +; 'user_defined_type' is taken from SYCL-CTS for spec constants. |
| 32 | +; |
| 33 | +; The idea of the test is to ensure that SpecConstants pass is able to handle |
| 34 | +; a situation, where spec constant default value contains less elements than |
| 35 | +; spec constant type, due to padding inserted by a compiler. |
| 36 | +; |
| 37 | +; RUN: sycl-post-link --spec-const=rt -S < %s -o %t.files.table |
| 38 | +; RUN: FileCheck %s -input-file=%t.files_0.ll |
| 39 | +; |
| 40 | +; CHECK: %[[#A:]] = call float @_Z20__spirv_SpecConstantif(i32 [[#ID:]], float 0x40091EB860000000) |
| 41 | +; CHECK: %[[#B:]] = call i32 @_Z20__spirv_SpecConstantii(i32 [[#ID+1]], i32 42) |
| 42 | +; CHECK: %[[#C:]] = call i8 @_Z20__spirv_SpecConstantia(i32 2, i8 8) |
| 43 | +; CHECK: call %struct.user_defined_type @_Z29__spirv_SpecConstantCompositefiaA3_a_Rstruct.user_defined_type(float %[[#A]], i32 %[[#B]], i8 %[[#C]], [3 x i8] undef) |
| 44 | +; |
| 45 | +; CHECK: !sycl.specialization-constants = !{![[#SC:]]} |
| 46 | +; CHECK: ![[#SC]] = !{!"uidac684fbd602505be____ZL7spec_id", |
| 47 | +; CHECK-SAME: i32 [[#ID]], i32 0, i32 4 |
| 48 | +; CHECK-SAME: i32 [[#ID+1]], i32 4, i32 4 |
| 49 | +; CHECK-SAME: i32 [[#ID+2]], i32 8, i32 1 |
| 50 | +; CHECK-SAME: i32 -1, i32 9, i32 3 |
| 51 | + |
| 52 | +source_filename = "t.cpp" |
| 53 | +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" |
| 54 | +target triple = "spir64-unknown-unknown" |
| 55 | + |
| 56 | +%struct.user_defined_type = type <{ float, i32, i8, [3 x i8] }> |
| 57 | +%"class.sycl::_V1::id" = type { %"class.sycl::_V1::detail::array" } |
| 58 | +%"class.sycl::_V1::detail::array" = type { [1 x i64] } |
| 59 | + |
| 60 | +$_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlNS0_14kernel_handlerEE_ = comdat any |
| 61 | + |
| 62 | +@__usid_str = private unnamed_addr constant [34 x i8] c"uidac684fbd602505be____ZL7spec_id\00", align 1 |
| 63 | +@_ZL7spec_id = internal addrspace(1) constant { { float, i32, i8 } } { { float, i32, i8 } { float 0x40091EB860000000, i32 42, i8 8 } }, align 4 |
| 64 | + |
| 65 | +; Function Attrs: convergent norecurse nounwind |
| 66 | +define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlNS0_14kernel_handlerEE_(%struct.user_defined_type addrspace(1)* noundef align 4 %_arg_acc, %"class.sycl::_V1::id"* noundef byval(%"class.sycl::_V1::id") align 8 %_arg_acc3) local_unnamed_addr #0 comdat !srcloc !48 !kernel_arg_buffer_location !49 !kernel_arg_runtime_aligned !50 !kernel_arg_exclusive_ptr !50 !sycl_fixed_targets !51 !sycl_kernel_omit_args !52 { |
| 67 | +entry: |
| 68 | + %ref.tmp.i = alloca %struct.user_defined_type, align 4 |
| 69 | + %0 = bitcast %"class.sycl::_V1::id"* %_arg_acc3 to i64* |
| 70 | + %1 = load i64, i64* %0, align 8 |
| 71 | + %add.ptr.i = getelementptr inbounds %struct.user_defined_type, %struct.user_defined_type addrspace(1)* %_arg_acc, i64 %1 |
| 72 | + %ref.tmp.ascast.i = addrspacecast %struct.user_defined_type* %ref.tmp.i to %struct.user_defined_type addrspace(4)* |
| 73 | + %2 = bitcast %struct.user_defined_type* %ref.tmp.i to i8* |
| 74 | + call void @llvm.lifetime.start.p0i8(i64 12, i8* nonnull %2) #4 |
| 75 | + call spir_func void @_Z40__sycl_getComposite2020SpecConstantValueI17user_defined_typeET_PKcPKvS5_(%struct.user_defined_type addrspace(4)* sret(%struct.user_defined_type) align 4 %ref.tmp.ascast.i, i8 addrspace(4)* noundef addrspacecast (i8* getelementptr inbounds ([34 x i8], [34 x i8]* @__usid_str, i64 0, i64 0) to i8 addrspace(4)*), i8 addrspace(4)* noundef addrspacecast (i8 addrspace(1)* bitcast ({ { float, i32, i8 } } addrspace(1)* @_ZL7spec_id to i8 addrspace(1)*) to i8 addrspace(4)*), i8 addrspace(4)* noundef null) #5 |
| 76 | + %3 = bitcast %struct.user_defined_type addrspace(1)* %add.ptr.i to i8 addrspace(1)* |
| 77 | + %4 = bitcast %struct.user_defined_type* %ref.tmp.i to i8* |
| 78 | + call void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* align 4 %3, i8* align 4 %4, i64 9, i1 false), !tbaa.struct !53 |
| 79 | + call void @llvm.lifetime.end.p0i8(i64 12, i8* nonnull %2) #4 |
| 80 | + ret void |
| 81 | +} |
| 82 | + |
| 83 | +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) |
| 84 | +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 |
| 85 | + |
| 86 | +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) |
| 87 | +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 |
| 88 | + |
| 89 | +; Function Attrs: convergent nounwind |
| 90 | +declare dso_local spir_func void @_Z40__sycl_getComposite2020SpecConstantValueI17user_defined_typeET_PKcPKvS5_(%struct.user_defined_type addrspace(4)* sret(%struct.user_defined_type) align 4, i8 addrspace(4)* noundef, i8 addrspace(4)* noundef, i8 addrspace(4)* noundef) local_unnamed_addr #2 |
| 91 | + |
| 92 | +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) |
| 93 | +declare void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #3 |
| 94 | + |
| 95 | +declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(i8 addrspace(2)*, ...) |
| 96 | + |
| 97 | +attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="t.cpp" "sycl-optlevel"="2" "sycl-single-task" "uniform-work-group-size"="true" } |
| 98 | +attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } |
| 99 | +attributes #2 = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } |
| 100 | +attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } |
| 101 | +attributes #4 = { nounwind } |
| 102 | +attributes #5 = { convergent nounwind } |
| 103 | + |
| 104 | +!llvm.module.flags = !{!0, !1} |
| 105 | +!opencl.spir.version = !{!2} |
| 106 | +!spirv.Source = !{!3} |
| 107 | +!sycl_aspects = !{!4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !45, !46} |
| 108 | +!llvm.ident = !{!47} |
| 109 | + |
| 110 | +!0 = !{i32 1, !"wchar_size", i32 4} |
| 111 | +!1 = !{i32 7, !"frame-pointer", i32 2} |
| 112 | +!2 = !{i32 1, i32 2} |
| 113 | +!3 = !{i32 4, i32 100000} |
| 114 | +!4 = !{!"cpu", i32 1} |
| 115 | +!5 = !{!"gpu", i32 2} |
| 116 | +!6 = !{!"accelerator", i32 3} |
| 117 | +!7 = !{!"custom", i32 4} |
| 118 | +!8 = !{!"fp16", i32 5} |
| 119 | +!9 = !{!"fp64", i32 6} |
| 120 | +!10 = !{!"image", i32 9} |
| 121 | +!11 = !{!"online_compiler", i32 10} |
| 122 | +!12 = !{!"online_linker", i32 11} |
| 123 | +!13 = !{!"queue_profiling", i32 12} |
| 124 | +!14 = !{!"usm_device_allocations", i32 13} |
| 125 | +!15 = !{!"usm_host_allocations", i32 14} |
| 126 | +!16 = !{!"usm_shared_allocations", i32 15} |
| 127 | +!17 = !{!"usm_system_allocations", i32 17} |
| 128 | +!18 = !{!"ext_intel_pci_address", i32 18} |
| 129 | +!19 = !{!"ext_intel_gpu_eu_count", i32 19} |
| 130 | +!20 = !{!"ext_intel_gpu_eu_simd_width", i32 20} |
| 131 | +!21 = !{!"ext_intel_gpu_slices", i32 21} |
| 132 | +!22 = !{!"ext_intel_gpu_subslices_per_slice", i32 22} |
| 133 | +!23 = !{!"ext_intel_gpu_eu_count_per_subslice", i32 23} |
| 134 | +!24 = !{!"ext_intel_max_mem_bandwidth", i32 24} |
| 135 | +!25 = !{!"ext_intel_mem_channel", i32 25} |
| 136 | +!26 = !{!"usm_atomic_host_allocations", i32 26} |
| 137 | +!27 = !{!"usm_atomic_shared_allocations", i32 27} |
| 138 | +!28 = !{!"atomic64", i32 28} |
| 139 | +!29 = !{!"ext_intel_device_info_uuid", i32 29} |
| 140 | +!30 = !{!"ext_oneapi_srgb", i32 30} |
| 141 | +!31 = !{!"ext_oneapi_native_assert", i32 31} |
| 142 | +!32 = !{!"host_debuggable", i32 32} |
| 143 | +!33 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} |
| 144 | +!34 = !{!"ext_oneapi_cuda_async_barrier", i32 34} |
| 145 | +!35 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} |
| 146 | +!36 = !{!"ext_intel_free_memory", i32 36} |
| 147 | +!37 = !{!"ext_intel_device_id", i32 37} |
| 148 | +!38 = !{!"ext_intel_memory_clock_rate", i32 38} |
| 149 | +!39 = !{!"ext_intel_memory_bus_width", i32 39} |
| 150 | +!40 = !{!"emulated", i32 40} |
| 151 | +!41 = !{!"ext_intel_legacy_image", i32 41} |
| 152 | +!42 = !{!"int64_base_atomics", i32 7} |
| 153 | +!43 = !{!"int64_extended_atomics", i32 8} |
| 154 | +!44 = !{!"usm_system_allocator", i32 17} |
| 155 | +!45 = !{!"usm_restricted_shared_allocations", i32 16} |
| 156 | +!46 = !{!"host", i32 0} |
| 157 | +!47 = !{!"clang version 17.0.0 (https://github.com/intel/llvm.git)"} |
| 158 | +!48 = !{i32 443} |
| 159 | +!49 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} |
| 160 | +!50 = !{i1 true, i1 false, i1 false, i1 false, i1 false} |
| 161 | +!51 = !{} |
| 162 | +!52 = !{i1 false, i1 true, i1 true, i1 false, i1 true} |
| 163 | +!53 = !{i64 0, i64 4, !54, i64 4, i64 4, !58, i64 8, i64 1, !60} |
| 164 | +!54 = !{!55, !55, i64 0} |
| 165 | +!55 = !{!"float", !56, i64 0} |
| 166 | +!56 = !{!"omnipotent char", !57, i64 0} |
| 167 | +!57 = !{!"Simple C++ TBAA"} |
| 168 | +!58 = !{!59, !59, i64 0} |
| 169 | +!59 = !{!"int", !56, i64 0} |
| 170 | +!60 = !{!56, !56, i64 0} |
0 commit comments