Skip to content

Commit 5dc1d7d

Browse files
mlychkovvladimirlaz
authored andcommitted
Fix ptr.annotation translation for pointers on non-struct types
Signed-off-by: Mikhail Lychkov <[email protected]>
1 parent 2800b4b commit 5dc1d7d

File tree

2 files changed

+149
-3
lines changed

2 files changed

+149
-3
lines changed

llvm-spirv/lib/SPIRV/SPIRVWriter.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2168,9 +2168,11 @@ SPIRVValue *LLVMToSPIRV::transIntrinsicInst(IntrinsicInst *II,
21682168
// actions for each individual extension.
21692169
Decorations = tryParseIntelFPGAAnnotationString(AnnotationString);
21702170

2171-
// If the pointer is a GEP, then we have to emit a member decoration for the
2172-
// GEP-accessed struct, or a memory access decoration for the GEP itself.
2173-
if (auto *GI = dyn_cast<GetElementPtrInst>(AnnotSubj)) {
2171+
// If the pointer is a GEP on a struct, then we have to emit a member
2172+
// decoration for the GEP-accessed struct, or a memory access decoration
2173+
// for the GEP itself.
2174+
auto *GI = dyn_cast<GetElementPtrInst>(AnnotSubj);
2175+
if (GI && isa<StructType>(GI->getSourceElementType())) {
21742176
auto *Ty = transType(GI->getSourceElementType());
21752177
auto *ResPtr = transValue(GI, BB);
21762178
SPIRVWord MemberNumber =
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
; LLVM IR generated by Intel SYCL Clang compiler (https://github.com/intel/llvm)
2+
; SYCL source code can be found below:
3+
4+
; #include <CL/sycl.hpp>
5+
; #include <CL/sycl/intel/fpga_extensions.hpp>
6+
;
7+
; int main() {
8+
; cl::sycl::queue Queue{cl::sycl::intel::fpga_emulator_selector{}};
9+
;
10+
; {
11+
; cl::sycl::buffer<int, 1> output_buffer(output_data, 1);
12+
; cl::sycl::buffer<int, 1> input_buffer(input_data, 1);
13+
;
14+
; Queue.submit([&](cl::sycl::handler &cgh) {
15+
; auto output_accessor =
16+
; output_buffer.get_access<cl::sycl::access::mode::write>(cgh);
17+
; auto input_accessor =
18+
; input_buffer.get_access<cl::sycl::access::mode::read>(cgh);
19+
;
20+
; cgh.single_task<class kernel>([=] {
21+
; auto input_ptr = input_accessor.get_pointer();
22+
; auto output_ptr = output_accessor.get_pointer();
23+
;
24+
; using PrefetchingLSU =
25+
; cl::sycl::intel::lsu<cl::sycl::intel::prefetch<true>,
26+
; cl::sycl::intel::statically_coalesce<false>>;
27+
;
28+
; using BurstCoalescedLSU =
29+
; cl::sycl::intel::lsu<cl::sycl::intel::burst_coalesce<true>,
30+
; cl::sycl::intel::statically_coalesce<false>>;
31+
;
32+
; using CachingLSU =
33+
; cl::sycl::intel::lsu<cl::sycl::intel::burst_coalesce<true>,
34+
; cl::sycl::intel::cache<1024>,
35+
; cl::sycl::intel::statically_coalesce<false>>;
36+
;
37+
; using PipelinedLSU = cl::sycl::intel::lsu<>;
38+
;
39+
; int X = PrefetchingLSU::load(input_ptr); // int X = input_ptr[0]
40+
; int Y = CachingLSU::load(input_ptr + 1); // int Y = input_ptr[1]
41+
;
42+
; BurstCoalescedLSU::store(output_ptr, X); // output_ptr[0] = X
43+
; PipelinedLSU::store(output_ptr + 1, Y); // output_ptr[1] = Y
44+
; });
45+
; });
46+
; }
47+
;
48+
; return 0;
49+
; }
50+
51+
; Check that translation of optimized IR doesn't crash:
52+
; RUN: llvm-as %s -o %t.bc
53+
; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_fpga_memory_accesses -o %t.spv
54+
55+
; Check that reverse translation restore ptr.annotations correctly:
56+
; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
57+
; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM
58+
59+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
60+
target triple = "spir64-unknown-unknown-sycldevice"
61+
62+
%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
63+
%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] }
64+
65+
$"_ZTSZZ8test_lsuN2cl4sycl5queueEENK3$_0clERNS0_7handlerEE6kernel" = comdat any
66+
67+
@.str = private unnamed_addr constant [26 x i8] c"{params:12}{cache-size:0}\00", section "llvm.metadata"
68+
@.str.1 = private unnamed_addr constant [14 x i8] c"<invalid loc>\00", section "llvm.metadata"
69+
@.str.2 = private unnamed_addr constant [28 x i8] c"{params:7}{cache-size:1024}\00", section "llvm.metadata"
70+
@.str.3 = private unnamed_addr constant [25 x i8] c"{params:5}{cache-size:0}\00", section "llvm.metadata"
71+
@.str.4 = private unnamed_addr constant [25 x i8] c"{params:0}{cache-size:0}\00", section "llvm.metadata"
72+
73+
; CHECK-LLVM: [[PTR_i27_ANNOT_STR:@[a-z0-9_.]]] = {{.*}}{params:12}
74+
; CHECK-LLVM: [[PTR_i15_i_ANNOT_STR:@[a-z0-9_.]]] = {{.*}}{params:7}{cache-size:1024}
75+
; CHECK-LLVM: [[PTR_i_ANNOT_STR:@[a-z0-9_.]]] = {{.*}}{params:5}
76+
; CHECK-LLVM: [[PTR_i_i_ANNOT_STR:@[a-z0-9_.]]] = {{.*}}{params:0}{cache-size:0}
77+
78+
; Function Attrs: norecurse
79+
define weak_odr dso_local spir_kernel void @"_ZTSZZ8test_lsuN2cl4sycl5queueEENK3$_0clERNS0_7handlerEE6kernel"(i32 addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3, i32 addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 !kernel_arg_buffer_location !8 {
80+
entry:
81+
%0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_3, i64 0, i32 0, i32 0, i64 0
82+
%1 = load i64, i64* %0, align 8
83+
%add.ptr.i27 = getelementptr inbounds i32, i32 addrspace(1)* %_arg_, i64 %1
84+
%2 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_8, i64 0, i32 0, i32 0, i64 0
85+
%3 = load i64, i64* %2, align 8
86+
%add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_4, i64 %3
87+
%4 = addrspacecast i32 addrspace(1)* %add.ptr.i27 to i32 addrspace(4)*
88+
%5 = tail call dereferenceable(4) i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* %4, i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), i32 0) #2
89+
%6 = load i32, i32 addrspace(4)* %5, align 4, !tbaa !9
90+
; CHECK-LLVM: [[PTR_i27:[%0-9a-z.]+]] = getelementptr inbounds i32, i32 addrspace(1)* {{[%0-9a-z._]+}}, i64 {{[%0-9a-z.]+}}
91+
; CHECK-LLVM: [[PTR_i:[%0-9a-z.]+]] = getelementptr inbounds i32, i32 addrspace(1)* {{[%0-9a-z._]+}}, i64 {{[%0-9a-z.]+}}
92+
; CHECK-LLVM: [[PTR_i27_AS_CAST:[%0-9a-z.]+]] = addrspacecast i32 addrspace(1)* [[PTR_i27]] to i32 addrspace(4)*
93+
; CHECK-LLVM: [[PTR_ANNOT_CALL:[%0-9a-z.]+]] = call i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* [[PTR_i27_AS_CAST]], i8* getelementptr inbounds ({{.*}} [[PTR_i27_ANNOT_STR]]
94+
; TODO: add check that load is called for result of ptr.annotation when corresponding bug is fixed
95+
%add.ptr.i15.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i27, i64 1
96+
%7 = addrspacecast i32 addrspace(1)* %add.ptr.i15.i to i32 addrspace(4)*
97+
%8 = tail call dereferenceable(4) i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* %7, i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.str.2, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), i32 0) #2
98+
%9 = load i32, i32 addrspace(4)* %8, align 4, !tbaa !9
99+
; CHECK-LLVM: [[PTR_i15_i:[%0-9a-z.]+]] = getelementptr inbounds i32, i32 addrspace(1)* {{[%0-9a-z._]+}}, i64 {{[%0-9a-z.]+}}
100+
; CHECK-LLVM: [[PTR_i15_i_AS_CAST:[%0-9a-z.]+]] = addrspacecast i32 addrspace(1)* [[PTR_i15_i]] to i32 addrspace(4)*
101+
; CHECK-LLVM: [[PTR_ANNOT_CALL:[%0-9a-z.]+]] = call i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* [[PTR_i15_i_AS_CAST]], i8* getelementptr inbounds ({{.*}} [[PTR_i15_i_ANNOT_STR]]
102+
; TODO: add check that load is called for result of ptr.annotation when corresponding bug is fixed
103+
%10 = addrspacecast i32 addrspace(1)* %add.ptr.i to i32 addrspace(4)*
104+
%11 = tail call i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* %10, i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), i32 0) #2
105+
store i32 %6, i32 addrspace(4)* %11, align 4, !tbaa !9
106+
; CHECK-LLVM: [[PTR_i_AS_CAST:[%0-9a-z.]+]] = addrspacecast i32 addrspace(1)* [[PTR_i]] to i32 addrspace(4)*
107+
; CHECK-LLVM: [[PTR_ANNOT_CALL:[%0-9a-z.]+]] = call i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* [[PTR_i_AS_CAST]], i8* getelementptr inbounds ({{.*}} [[PTR_i_ANNOT_STR]]
108+
; TODO: add check that store is called for result of ptr.annotation when corresponding bug is fixed
109+
%add.ptr.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 1
110+
%12 = addrspacecast i32 addrspace(1)* %add.ptr.i.i to i32 addrspace(4)*
111+
%13 = tail call i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* %12, i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.4, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), i32 0) #2
112+
store i32 %9, i32 addrspace(4)* %13, align 4, !tbaa !9
113+
; CHECK-LLVM: [[PTR_i_i:[%0-9a-z.]+]] = getelementptr inbounds i32, i32 addrspace(1)* {{[%0-9a-z._]+}}, i64 {{[%0-9a-z.]+}}
114+
; CHECK-LLVM: [[PTR_i_i_AS_CAST:[%0-9a-z.]+]] = addrspacecast i32 addrspace(1)* [[PTR_i_i]] to i32 addrspace(4)*
115+
; CHECK-LLVM: [[PTR_ANNOT_CALL:[%0-9a-z.]+]] = call i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* [[PTR_i_i_AS_CAST]], i8* getelementptr inbounds ({{.*}} [[PTR_i_i_ANNOT_STR]]
116+
; TODO: add check that store is called for result of ptr.annotation when corresponding bug is fixed
117+
ret void
118+
}
119+
120+
; Function Attrs: nounwind willreturn
121+
declare i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)*, i8*, i8*, i32) #1
122+
123+
attributes #0 = { norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="fpga_lsu.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
124+
attributes #1 = { nounwind willreturn }
125+
attributes #2 = { nounwind readnone }
126+
127+
!opencl.spir.version = !{!0}
128+
!spirv.Source = !{!1}
129+
!llvm.ident = !{!2}
130+
!llvm.module.flags = !{!3}
131+
132+
!0 = !{i32 1, i32 2}
133+
!1 = !{i32 4, i32 100000}
134+
!2 = !{!"clang version 12.0.0"}
135+
!3 = !{i32 1, !"wchar_size", i32 4}
136+
!4 = !{i32 1, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0}
137+
!5 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none"}
138+
!6 = !{!"int*", !"cl::sycl::range<1>", !"cl::sycl::range<1>", !"cl::sycl::id<1>", !"int*", !"cl::sycl::range<1>", !"cl::sycl::range<1>", !"cl::sycl::id<1>"}
139+
!7 = !{!"", !"", !"", !"", !"", !"", !"", !""}
140+
!8 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1}
141+
!9 = !{!10, !10, i64 0}
142+
!10 = !{!"int", !11, i64 0}
143+
!11 = !{!"omnipotent char", !12, i64 0}
144+
!12 = !{!"Simple C++ TBAA"}

0 commit comments

Comments
 (0)