|
| 1 | +; LLVM IR generated by Intel SYCL Clang compiler (https://github.com/intel/llvm) |
| 2 | +; SYCL source code can be found below: |
| 3 | + |
| 4 | +; #include <CL/sycl.hpp> |
| 5 | +; #include <CL/sycl/intel/fpga_extensions.hpp> |
| 6 | +; |
| 7 | +; int main() { |
| 8 | +; cl::sycl::queue Queue{cl::sycl::intel::fpga_emulator_selector{}}; |
| 9 | +; |
| 10 | +; { |
| 11 | +; cl::sycl::buffer<int, 1> output_buffer(output_data, 1); |
| 12 | +; cl::sycl::buffer<int, 1> input_buffer(input_data, 1); |
| 13 | +; |
| 14 | +; Queue.submit([&](cl::sycl::handler &cgh) { |
| 15 | +; auto output_accessor = |
| 16 | +; output_buffer.get_access<cl::sycl::access::mode::write>(cgh); |
| 17 | +; auto input_accessor = |
| 18 | +; input_buffer.get_access<cl::sycl::access::mode::read>(cgh); |
| 19 | +; |
| 20 | +; cgh.single_task<class kernel>([=] { |
| 21 | +; auto input_ptr = input_accessor.get_pointer(); |
| 22 | +; auto output_ptr = output_accessor.get_pointer(); |
| 23 | +; |
| 24 | +; using PrefetchingLSU = |
| 25 | +; cl::sycl::intel::lsu<cl::sycl::intel::prefetch<true>, |
| 26 | +; cl::sycl::intel::statically_coalesce<false>>; |
| 27 | +; |
| 28 | +; using BurstCoalescedLSU = |
| 29 | +; cl::sycl::intel::lsu<cl::sycl::intel::burst_coalesce<true>, |
| 30 | +; cl::sycl::intel::statically_coalesce<false>>; |
| 31 | +; |
| 32 | +; using CachingLSU = |
| 33 | +; cl::sycl::intel::lsu<cl::sycl::intel::burst_coalesce<true>, |
| 34 | +; cl::sycl::intel::cache<1024>, |
| 35 | +; cl::sycl::intel::statically_coalesce<false>>; |
| 36 | +; |
| 37 | +; using PipelinedLSU = cl::sycl::intel::lsu<>; |
| 38 | +; |
| 39 | +; int X = PrefetchingLSU::load(input_ptr); // int X = input_ptr[0] |
| 40 | +; int Y = CachingLSU::load(input_ptr + 1); // int Y = input_ptr[1] |
| 41 | +; |
| 42 | +; BurstCoalescedLSU::store(output_ptr, X); // output_ptr[0] = X |
| 43 | +; PipelinedLSU::store(output_ptr + 1, Y); // output_ptr[1] = Y |
| 44 | +; }); |
| 45 | +; }); |
| 46 | +; } |
| 47 | +; |
| 48 | +; return 0; |
| 49 | +; } |
| 50 | + |
| 51 | +; Check that translation of optimized IR doesn't crash: |
| 52 | +; RUN: llvm-as %s -o %t.bc |
| 53 | +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_fpga_memory_accesses -o %t.spv |
| 54 | + |
| 55 | +; Check that reverse translation restore ptr.annotations correctly: |
| 56 | +; RUN: llvm-spirv -r %t.spv -o %t.rev.bc |
| 57 | +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM |
| 58 | + |
| 59 | +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" |
| 60 | +target triple = "spir64-unknown-unknown-sycldevice" |
| 61 | + |
| 62 | +%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } |
| 63 | +%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] } |
| 64 | + |
| 65 | +$"_ZTSZZ8test_lsuN2cl4sycl5queueEENK3$_0clERNS0_7handlerEE6kernel" = comdat any |
| 66 | + |
| 67 | +@.str = private unnamed_addr constant [26 x i8] c"{params:12}{cache-size:0}\00", section "llvm.metadata" |
| 68 | +@.str.1 = private unnamed_addr constant [14 x i8] c"<invalid loc>\00", section "llvm.metadata" |
| 69 | +@.str.2 = private unnamed_addr constant [28 x i8] c"{params:7}{cache-size:1024}\00", section "llvm.metadata" |
| 70 | +@.str.3 = private unnamed_addr constant [25 x i8] c"{params:5}{cache-size:0}\00", section "llvm.metadata" |
| 71 | +@.str.4 = private unnamed_addr constant [25 x i8] c"{params:0}{cache-size:0}\00", section "llvm.metadata" |
| 72 | + |
| 73 | +; CHECK-LLVM: [[PTR_i27_ANNOT_STR:@[a-z0-9_.]]] = {{.*}}{params:12} |
| 74 | +; CHECK-LLVM: [[PTR_i15_i_ANNOT_STR:@[a-z0-9_.]]] = {{.*}}{params:7}{cache-size:1024} |
| 75 | +; CHECK-LLVM: [[PTR_i_ANNOT_STR:@[a-z0-9_.]]] = {{.*}}{params:5} |
| 76 | +; CHECK-LLVM: [[PTR_i_i_ANNOT_STR:@[a-z0-9_.]]] = {{.*}}{params:0}{cache-size:0} |
| 77 | + |
| 78 | +; Function Attrs: norecurse |
| 79 | +define weak_odr dso_local spir_kernel void @"_ZTSZZ8test_lsuN2cl4sycl5queueEENK3$_0clERNS0_7handlerEE6kernel"(i32 addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3, i32 addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 !kernel_arg_buffer_location !8 { |
| 80 | +entry: |
| 81 | + %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_3, i64 0, i32 0, i32 0, i64 0 |
| 82 | + %1 = load i64, i64* %0, align 8 |
| 83 | + %add.ptr.i27 = getelementptr inbounds i32, i32 addrspace(1)* %_arg_, i64 %1 |
| 84 | + %2 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_8, i64 0, i32 0, i32 0, i64 0 |
| 85 | + %3 = load i64, i64* %2, align 8 |
| 86 | + %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_4, i64 %3 |
| 87 | + %4 = addrspacecast i32 addrspace(1)* %add.ptr.i27 to i32 addrspace(4)* |
| 88 | + %5 = tail call dereferenceable(4) i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* %4, i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), i32 0) #2 |
| 89 | + %6 = load i32, i32 addrspace(4)* %5, align 4, !tbaa !9 |
| 90 | + ; CHECK-LLVM: [[PTR_i27:[%0-9a-z.]+]] = getelementptr inbounds i32, i32 addrspace(1)* {{[%0-9a-z._]+}}, i64 {{[%0-9a-z.]+}} |
| 91 | + ; CHECK-LLVM: [[PTR_i:[%0-9a-z.]+]] = getelementptr inbounds i32, i32 addrspace(1)* {{[%0-9a-z._]+}}, i64 {{[%0-9a-z.]+}} |
| 92 | + ; CHECK-LLVM: [[PTR_i27_AS_CAST:[%0-9a-z.]+]] = addrspacecast i32 addrspace(1)* [[PTR_i27]] to i32 addrspace(4)* |
| 93 | + ; CHECK-LLVM: [[PTR_ANNOT_CALL:[%0-9a-z.]+]] = call i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* [[PTR_i27_AS_CAST]], i8* getelementptr inbounds ({{.*}} [[PTR_i27_ANNOT_STR]] |
| 94 | + ; TODO: add check that load is called for result of ptr.annotation when corresponding bug is fixed |
| 95 | + %add.ptr.i15.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i27, i64 1 |
| 96 | + %7 = addrspacecast i32 addrspace(1)* %add.ptr.i15.i to i32 addrspace(4)* |
| 97 | + %8 = tail call dereferenceable(4) i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* %7, i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.str.2, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), i32 0) #2 |
| 98 | + %9 = load i32, i32 addrspace(4)* %8, align 4, !tbaa !9 |
| 99 | + ; CHECK-LLVM: [[PTR_i15_i:[%0-9a-z.]+]] = getelementptr inbounds i32, i32 addrspace(1)* {{[%0-9a-z._]+}}, i64 {{[%0-9a-z.]+}} |
| 100 | + ; CHECK-LLVM: [[PTR_i15_i_AS_CAST:[%0-9a-z.]+]] = addrspacecast i32 addrspace(1)* [[PTR_i15_i]] to i32 addrspace(4)* |
| 101 | + ; CHECK-LLVM: [[PTR_ANNOT_CALL:[%0-9a-z.]+]] = call i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* [[PTR_i15_i_AS_CAST]], i8* getelementptr inbounds ({{.*}} [[PTR_i15_i_ANNOT_STR]] |
| 102 | + ; TODO: add check that load is called for result of ptr.annotation when corresponding bug is fixed |
| 103 | + %10 = addrspacecast i32 addrspace(1)* %add.ptr.i to i32 addrspace(4)* |
| 104 | + %11 = tail call i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* %10, i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), i32 0) #2 |
| 105 | + store i32 %6, i32 addrspace(4)* %11, align 4, !tbaa !9 |
| 106 | + ; CHECK-LLVM: [[PTR_i_AS_CAST:[%0-9a-z.]+]] = addrspacecast i32 addrspace(1)* [[PTR_i]] to i32 addrspace(4)* |
| 107 | + ; CHECK-LLVM: [[PTR_ANNOT_CALL:[%0-9a-z.]+]] = call i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* [[PTR_i_AS_CAST]], i8* getelementptr inbounds ({{.*}} [[PTR_i_ANNOT_STR]] |
| 108 | + ; TODO: add check that store is called for result of ptr.annotation when corresponding bug is fixed |
| 109 | + %add.ptr.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 1 |
| 110 | + %12 = addrspacecast i32 addrspace(1)* %add.ptr.i.i to i32 addrspace(4)* |
| 111 | + %13 = tail call i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* %12, i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.4, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), i32 0) #2 |
| 112 | + store i32 %9, i32 addrspace(4)* %13, align 4, !tbaa !9 |
| 113 | + ; CHECK-LLVM: [[PTR_i_i:[%0-9a-z.]+]] = getelementptr inbounds i32, i32 addrspace(1)* {{[%0-9a-z._]+}}, i64 {{[%0-9a-z.]+}} |
| 114 | + ; CHECK-LLVM: [[PTR_i_i_AS_CAST:[%0-9a-z.]+]] = addrspacecast i32 addrspace(1)* [[PTR_i_i]] to i32 addrspace(4)* |
| 115 | + ; CHECK-LLVM: [[PTR_ANNOT_CALL:[%0-9a-z.]+]] = call i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* [[PTR_i_i_AS_CAST]], i8* getelementptr inbounds ({{.*}} [[PTR_i_i_ANNOT_STR]] |
| 116 | + ; TODO: add check that store is called for result of ptr.annotation when corresponding bug is fixed |
| 117 | + ret void |
| 118 | +} |
| 119 | + |
| 120 | +; Function Attrs: nounwind willreturn |
| 121 | +declare i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)*, i8*, i8*, i32) #1 |
| 122 | + |
| 123 | +attributes #0 = { norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="fpga_lsu.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } |
| 124 | +attributes #1 = { nounwind willreturn } |
| 125 | +attributes #2 = { nounwind readnone } |
| 126 | + |
| 127 | +!opencl.spir.version = !{!0} |
| 128 | +!spirv.Source = !{!1} |
| 129 | +!llvm.ident = !{!2} |
| 130 | +!llvm.module.flags = !{!3} |
| 131 | + |
| 132 | +!0 = !{i32 1, i32 2} |
| 133 | +!1 = !{i32 4, i32 100000} |
| 134 | +!2 = !{!"clang version 12.0.0"} |
| 135 | +!3 = !{i32 1, !"wchar_size", i32 4} |
| 136 | +!4 = !{i32 1, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0} |
| 137 | +!5 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none"} |
| 138 | +!6 = !{!"int*", !"cl::sycl::range<1>", !"cl::sycl::range<1>", !"cl::sycl::id<1>", !"int*", !"cl::sycl::range<1>", !"cl::sycl::range<1>", !"cl::sycl::id<1>"} |
| 139 | +!7 = !{!"", !"", !"", !"", !"", !"", !"", !""} |
| 140 | +!8 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} |
| 141 | +!9 = !{!10, !10, i64 0} |
| 142 | +!10 = !{!"int", !11, i64 0} |
| 143 | +!11 = !{!"omnipotent char", !12, i64 0} |
| 144 | +!12 = !{!"Simple C++ TBAA"} |
0 commit comments