Skip to content

Commit dfaaaed

Browse files
authored
[SYCL][ESIMD] Fix a crash in LowerESIMDVecArg pass (#3164)
Fix a crash that occurred because LowerESIMDVecArg pass was trying to add code to a function declaration. Signed-off-by: pratik ashar [email protected]
1 parent 489df6b commit dfaaaed

File tree

2 files changed

+44
-2
lines changed

2 files changed

+44
-2
lines changed

llvm/lib/SYCLLowerIR/LowerESIMDVecArg.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,12 @@ Function *ESIMDLowerVecArgPass::rewriteFunc(Function &F) {
181181

182182
llvm::CloneFunctionInto(NF, &F, VMap, F.getSubprogram() != nullptr, Returns);
183183

184+
// insert bitcasts in new function only if its a definition
184185
for (auto &B : BitCasts) {
185-
NF->begin()->getInstList().push_front(B);
186+
if (!F.isDeclaration())
187+
NF->begin()->getInstList().push_front(B);
188+
else
189+
delete B;
186190
}
187191

188192
NF->takeName(&F);
@@ -281,6 +285,5 @@ bool ESIMDLowerVecArgPass::run(Module &M) {
281285
}
282286
}
283287
}
284-
285288
return true;
286289
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
; This test checks whether subroutine arguments are converted
2+
; correctly to llvm's native vector type when callee is an extern function.
3+
;
4+
; RUN: opt < %s -ESIMDLowerVecArg -S | FileCheck %s
5+
6+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
7+
target triple = "spir64-unknown-unknown-sycldevice"
8+
9+
%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd" = type { <16 x float> }
10+
11+
$"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test" = comdat any
12+
13+
@_ZL2VL = internal unnamed_addr addrspace(1) constant i32 16, align 4
14+
15+
; Function Attrs: convergent norecurse
16+
define weak_odr dso_local spir_kernel void @"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test"() {
17+
entry:
18+
%0 = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd", align 64
19+
%agg.tmp5.i = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd", align 64
20+
%agg.tmp6.i = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd", align 64
21+
%1 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd"* %0 to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)*
22+
23+
; CHECK: [[BITCASTRESULT1:%[a-zA-Z0-9_]*]] = bitcast {{.+}} addrspace(4)* %1 to <16 x float> addrspace(4)*
24+
; CHECK: [[BITCASTRESULT2:%[a-zA-Z0-9_]*]] = bitcast {{.+}} %agg.tmp5.i to <16 x float>*
25+
; CHECK: [[BITCASTRESULT3:%[a-zA-Z0-9_]*]] = bitcast {{.+}} %agg.tmp6.i to <16 x float>*
26+
; CHECK-NEXT: call spir_func void @_Z4vaddN2cl4sycl5INTEL3gpu4simdIfLi16EEES4_(<16 x float> addrspace(4)* [[BITCASTRESULT1]], <16 x float>* [[BITCASTRESULT2]], <16 x float>* [[BITCASTRESULT3]])
27+
28+
call spir_func void @_Z4vaddN2cl4sycl5INTEL3gpu4simdIfLi16EEES4_(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* sret(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %1, %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd"* nonnull byval(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %agg.tmp5.i, %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd"* nonnull byval(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %agg.tmp6.i) #1
29+
ret void
30+
}
31+
32+
; CHECK: declare dso_local spir_func void @_Z4vaddN2cl4sycl5INTEL3gpu4simdIfLi16EEES4_(<16 x float> addrspace(4)*, <16 x float>*, <16 x float>*){{.+}}
33+
; Function Attrs: convergent
34+
declare dso_local spir_func void @_Z4vaddN2cl4sycl5INTEL3gpu4simdIfLi16EEES4_(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd" addrspace(4)* sret(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %0, %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd"* byval(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %1, %"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd"* byval(%"class._ZTSN2cl4sycl5INTEL3gpu4simdIfLi16EEE.cl::sycl::INTEL::gpu::simd") align 64 %2) local_unnamed_addr #2
35+
36+
attributes #0 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
37+
attributes #1 = { convergent }
38+
39+

0 commit comments

Comments
 (0)