Skip to content

Commit 2e7077d

Browse files
YuriPlyakhinigcbot
authored andcommitted
Resolve type mismatch when calling ushort OCL built-ins for bfloat16 types
Resolve "Calling a function with a bad signature!" assertion in debug builds caused by passing %"class.sycl::_V1::ext::oneapi::bfloat16" type parameter to built-ins, which accept i16 in SubGroupFuncsResolution pass.
1 parent 8061422 commit 2e7077d

File tree

2 files changed

+70
-2
lines changed

2 files changed

+70
-2
lines changed

IGC/Compiler/Optimizer/OpenCLPasses/SubGroupFuncs/SubGroupFuncsResolution.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,22 @@ void SubGroupFuncsResolution::mediaBlockWrite(llvm::CallInst& CI)
333333
CI.eraseFromParent();
334334
}
335335

336+
// If CI parameter is %"class.sycl::_V1::ext::oneapi::bfloat16" type, which is { i16 },
337+
// then we need to cast it to i16 type before calling simdBlockRead intrinsic.
338+
static inline Value* castSYCLBFloat16toi16(PointerType* PtrTy, Value* Ptr, CallInst& CI, LLVMContext& C)
339+
{
340+
if (StructType* ST = dyn_cast<StructType>(IGCLLVM::getNonOpaquePtrEltTy(PtrTy)))
341+
{
342+
// check if ST has only field and this field is i16 type
343+
if (ST->getNumElements() == 1 && ST->getElementType(0)->isIntegerTy(16))
344+
{
345+
return CastInst::CreatePointerCast(Ptr, PointerType::get(Type::getInt16Ty(C), PtrTy->getAddressSpace()), "", &CI);
346+
}
347+
}
348+
349+
return Ptr;
350+
}
351+
336352
void SubGroupFuncsResolution::simdBlockRead(llvm::CallInst& CI, bool hasCacheControls)
337353
{
338354
// Creates intrinsics that will be lowered in the CodeGen and will handle the simd_block_read
@@ -341,7 +357,7 @@ void SubGroupFuncsResolution::simdBlockRead(llvm::CallInst& CI, bool hasCacheCon
341357
PointerType* PtrTy = dyn_cast<PointerType>(Ptr->getType());
342358
IGC_ASSERT_MESSAGE(PtrTy, "simdBlockRead has non-pointer type!");
343359
SmallVector<Value*, 1> args;
344-
args.push_back(Ptr);
360+
args.push_back(castSYCLBFloat16toi16(PtrTy, Ptr, CI, C));
345361
SmallVector<Type*, 3> types;
346362
types.push_back(nullptr); types.push_back(nullptr);
347363
GenISAIntrinsic::ID genIntrinID = GenISAIntrinsic::GenISA_simdBlockRead;
@@ -447,7 +463,7 @@ void SubGroupFuncsResolution::simdBlockWrite(llvm::CallInst& CI, bool hasCacheCo
447463
SmallVector<Type*, 2> types;
448464
Value* dataArg = CI.getArgOperand(1);
449465

450-
args.push_back(CI.getArgOperand(0));
466+
args.push_back(castSYCLBFloat16toi16(PtrTy, Ptr, CI, C));
451467
args.push_back(dataArg);
452468

453469
switch (dataArg->getType()->getScalarType()->getScalarSizeInBits())
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
;
9+
; RUN: igc_opt --platformdg2 --igc-sub-group-func-resolution -S %s 2>&1 | FileCheck %s
10+
; ------------------------------------------------
11+
; SubGroupFuncsResolution
12+
; ------------------------------------------------
13+
; This test checks that SubGroupFuncsResolution pass resolves mismatch
14+
; between bfloat16 type passed from SYCL and built-ins accepting i16 type
15+
; ------------------------------------------------
16+
17+
%"class.sycl::_V1::ext::oneapi::bfloat16" = type { i16 }
18+
19+
define spir_kernel void @test_bfloat16(%"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(3)* %dst, %"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(1)* %src) #0 {
20+
; CHECK-LABEL: @test_bfloat16(
21+
; CHECK-NEXT: entry:
22+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast %"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(1)* [[SRC:%.*]] to i16 addrspace(1)*
23+
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i16> @llvm.genx.GenISA.simdBlockRead.v2i16.p1i16(i16 addrspace(1)* [[TMP0]])
24+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast %"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(3)* [[DST:%.*]] to i16 addrspace(3)*
25+
; CHECK-NEXT: call void @llvm.genx.GenISA.simdBlockWrite.p3i16.v2i16(i16 addrspace(3)* [[TMP2]], <2 x i16> [[TMP1]])
26+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast %"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(1)* [[SRC]] to i16 addrspace(1)*
27+
; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i16> @llvm.genx.GenISA.simdBlockRead.v16i16.p1i16(i16 addrspace(1)* [[TMP3]])
28+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast %"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(3)* [[DST]] to i16 addrspace(3)*
29+
; CHECK-NEXT: call void @llvm.genx.GenISA.simdBlockWrite.p3i16.v16i16(i16 addrspace(3)* [[TMP5]], <16 x i16> [[TMP4]])
30+
; CHECK-NEXT: ret void
31+
;
32+
entry:
33+
%0 = call spir_func <2 x i16> @__builtin_IB_simd_block_read_2_global_h(%"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(1)* %src) #0
34+
call spir_func void @__builtin_IB_simd_block_write_2_local_h(%"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(3)* %dst, <2 x i16> %0) #0
35+
%1 = call spir_func <16 x i16> @__builtin_IB_simd_block_read_16_global_h(%"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(1)* %src) #0
36+
call spir_func void @__builtin_IB_simd_block_write_16_local_h(%"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(3)* %dst, <16 x i16> %1) #0
37+
ret void
38+
}
39+
40+
declare spir_func <2 x i16> @__builtin_IB_simd_block_read_2_global_h(%"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(1)*) #0
41+
declare spir_func void @__builtin_IB_simd_block_write_2_local_h(%"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(3)*, <2 x i16>) #0
42+
declare spir_func <16 x i16> @__builtin_IB_simd_block_read_16_global_h(%"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(1)*) #0
43+
declare spir_func void @__builtin_IB_simd_block_write_16_local_h(%"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(3)*, <16 x i16>) #0
44+
45+
attributes #0 = { convergent noinline nounwind optnone }
46+
47+
!igc.functions = !{!3}
48+
49+
!3 = !{void (%"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(3)*, %"class.sycl::_V1::ext::oneapi::bfloat16" addrspace(1)*)* @test_bfloat16, !4}
50+
!4 = !{!5, !6}
51+
!5 = !{!"function_type", i32 0}
52+
!6 = !{!"sub_group_size", i32 8}

0 commit comments

Comments
 (0)