Skip to content

[SYCL] Fix type traits used for shuffle operations #17055

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 34 additions & 23 deletions sycl/include/sycl/detail/spirv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -797,29 +797,36 @@ AtomicMax(multi_ptr<T, AddressSpace, IsDecorated> MPtr, memory_scope Scope,
#ifndef __NVPTX__

template <typename T>
struct TypeIsProhibitedForShuffleEmulation
struct VecTypeIsProhibitedForNativeShuffle
: std::bool_constant<
(detail::get_vec_size<T>::size > 1) &&
check_type_in_v<vector_element_t<T>, double, long, long long,
unsigned long, unsigned long long, half>> {};

// Native shuffle is supported if T is scalar arithmetic type or vector type
// with arithmetic element type which is not in prohibited list.
// detail::is_arithmetic looks through pointer, marray and vec into the element
// type, so we have to exclude marray and pointer types.
template <typename T>
struct VecTypeIsProhibitedForShuffleEmulation
: std::bool_constant<
(detail::get_vec_size<T>::size > 1) &&
TypeIsProhibitedForShuffleEmulation<vector_element_t<T>>::value> {};
struct NativeShuffle
: std::bool_constant<detail::is_arithmetic<T>::value &&
!VecTypeIsProhibitedForNativeShuffle<T>::value &&
!detail::is_marray_v<T> && !detail::is_pointer_v<T>> {
};

// Non-scalar shuffle (emulation via loop + scalar shuffle) is used if we have a
// vector type for which native shuffle is not supported or for marray type.
template <typename T>
using EnableIfNativeShuffle =
std::enable_if_t<detail::is_arithmetic<T>::value &&
!VecTypeIsProhibitedForShuffleEmulation<T>::value &&
!detail::is_marray_v<T>,
T>;
struct NonScalarShuffle
: std::bool_constant<!NativeShuffle<T>::value &&
(detail::is_vec_v<T> || detail::is_marray_v<T>)> {};

template <typename T>
using EnableIfNativeShuffle = std::enable_if_t<NativeShuffle<T>::value, T>;

template <typename T>
using EnableIfNonScalarShuffle =
std::enable_if_t<VecTypeIsProhibitedForShuffleEmulation<T>::value ||
detail::is_marray_v<T>,
T>;
std::enable_if_t<NonScalarShuffle<T>::value, T>;

#else // ifndef __NVPTX__

Expand All @@ -835,13 +842,19 @@ using EnableIfNonScalarShuffle =
// Bitcast shuffles can be implemented using a single SubgroupShuffle
// intrinsic, but require type-punning via an appropriate integer type
#ifndef __NVPTX__

// Use bitcast shuffle for trivially copyable types satisfying size requirements
// that are not handled by native shuffle and non-scalar shuffle.
template <typename T>
using EnableIfBitcastShuffle =
std::enable_if_t<!detail::is_arithmetic<T>::value &&
(std::is_trivially_copyable_v<T> &&
(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 ||
sizeof(T) == 8)),
T>;
struct BitcastShuffle : std::bool_constant<!NativeShuffle<T>::value &&
!NonScalarShuffle<T>::value &&
std::is_trivially_copyable_v<T> &&
(sizeof(T) == 1 || sizeof(T) == 2 ||
sizeof(T) == 4 || sizeof(T) == 8)> {
};

template <typename T>
using EnableIfBitcastShuffle = std::enable_if_t<BitcastShuffle<T>::value, T>;
#else
template <typename T>
using EnableIfBitcastShuffle =
Expand All @@ -860,10 +873,8 @@ using EnableIfBitcastShuffle =
#ifndef __NVPTX__
template <typename T>
using EnableIfGenericShuffle =
std::enable_if_t<!detail::is_arithmetic<T>::value &&
!(std::is_trivially_copyable_v<T> &&
(sizeof(T) == 1 || sizeof(T) == 2 ||
sizeof(T) == 4 || sizeof(T) == 8)),
std::enable_if_t<!NativeShuffle<T>::value && !NonScalarShuffle<T>::value &&
!BitcastShuffle<T>::value,
T>;
#else
template <typename T>
Expand Down
165 changes: 165 additions & 0 deletions sycl/test/check_device_code/group_shuffle.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clangxx -O3 -fsycl -fsycl-device-only -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -o - %s | FileCheck %s
// REQUIRES: linux

#include <sycl/sycl.hpp>

using namespace sycl;
using namespace sycl::ext::oneapi;

// CHECK-LABEL: @_Z13test_shuffle1RN4sycl3_V19sub_groupEPNS0_3vecINS0_3ext6oneapi8bfloat16ELi4EEEm(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[AGG_TMP14_I:%.*]] = alloca %"class.sycl::_V1::vec", align 8
// CHECK-NEXT: [[REF_TMP:%.*]] = alloca %"class.sycl::_V1::vec", align 8
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw %"class.sycl::_V1::vec", ptr addrspace(4) [[BUF:%.*]], i64 [[ID:%.*]]
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[ARRAYIDX]], align 8
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[REF_TMP]]) #[[ATTR5:[0-9]+]]
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]])
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP14_I]]), !noalias [[META8]]
// CHECK-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP14_I]], align 1, !noalias [[META8]]
// CHECK-NEXT: br label [[FOR_COND_I_I:%.*]]
// CHECK: for.cond.i.i:
// CHECK-NEXT: [[S_0_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ]
// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[S_0_I_I]], 4
// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V117SELECT_FROM_GROUPINS0_9SUB_GROUPENS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI4EEEEENST9ENABLE_IFIXAAOOSR3STDE9IS_SAME_VINST5DECAYIT_E4TYPEES2_ESR4SYCL3EXT6ONEAPI12EXPERIMENTALE27IS_USER_CONSTRUCTED_GROUP_VISC_EOOSR3STDE23IS_TRIVIALLY_COPYABLE_VIT0_ESR6DETAIL6IS_VECISD_EE5VALUEESD_E4TYPEESA_SD_NSA_7ID_TYPEE_EXIT:%.*]]
// CHECK: for.body.i.i:
// CHECK-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[S_0_I_I]] to i64
// CHECK-NEXT: [[ARRAYIDX_I_I_I_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[AGG_TMP14_I]], i64 0, i64 [[CONV_I_I_I]]
// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX_I_I_I_I_I]], align 2, !tbaa [[TBAA14:![0-9]+]], !noalias [[META18:![0-9]+]]
// CHECK-NEXT: [[CALL4_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @_Z28__spirv_SubgroupShuffleINTELItET_S0_j(i16 noundef zeroext [[TMP1]], i32 noundef 1) #[[ATTR6:[0-9]+]], !noalias [[META19:![0-9]+]]
// CHECK-NEXT: [[ARRAYIDX_I_I_I12_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[REF_TMP]], i64 0, i64 [[CONV_I_I_I]]
// CHECK-NEXT: store i16 [[CALL4_I_I_I_I]], ptr [[ARRAYIDX_I_I_I12_I_I]], align 2, !tbaa [[TBAA14]], !alias.scope [[META18]]
// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i32 [[S_0_I_I]], 1
// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK: _ZN4sycl3_V117select_from_groupINS0_9sub_groupENS0_3vecINS0_3ext6oneapi8bfloat16ELi4EEEEENSt9enable_ifIXaaoosr3stdE9is_same_vINSt5decayIT_E4typeES2_Esr4sycl3ext6oneapi12experimentalE27is_user_constructed_group_vISC_Eoosr3stdE23is_trivially_copyable_vIT0_Esr6detail6is_vecISD_EE5valueESD_E4typeESA_SD_NSA_7id_typeE.exit:
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP14_I]]), !noalias [[META8]]
// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[REF_TMP]], align 8
// CHECK-NEXT: store i64 [[TMP2]], ptr addrspace(4) [[ARRAYIDX]], align 8
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[REF_TMP]]) #[[ATTR5]]
// CHECK-NEXT: ret void
//
SYCL_EXTERNAL void test_shuffle1(sycl::sub_group &sg, vec<bfloat16, 4> *buf,
size_t id) {
vec<bfloat16, 4> ItemVal = buf[id];
buf[id] = sycl::select_from_group(sg, ItemVal, 1);
}

// CHECK-LABEL: @_Z13test_shuffle2RN4sycl3_V19sub_groupEPNS0_6marrayINS0_3ext6oneapi8bfloat16ELm4EEEm(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[AGG_TMP14_I:%.*]] = alloca %"class.sycl::_V1::marray", align 8
// CHECK-NEXT: [[REF_TMP:%.*]] = alloca %"class.sycl::_V1::marray", align 2
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw %"class.sycl::_V1::marray", ptr addrspace(4) [[BUF:%.*]], i64 [[ID:%.*]]
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[ARRAYIDX]], align 2, !tbaa [[TBAA25:![0-9]+]]
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[REF_TMP]]) #[[ATTR5]]
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]])
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]])
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[AGG_TMP14_I]]), !noalias [[META26]]
// CHECK-NEXT: store i64 [[TMP0]], ptr [[AGG_TMP14_I]], align 1, !noalias [[META26]]
// CHECK-NEXT: br label [[ARRAYINIT_BODY_I_I_I:%.*]]
// CHECK: arrayinit.body.i.i.i:
// CHECK-NEXT: [[ARRAYINIT_CUR_IDX_I_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ARRAYINIT_CUR_ADD_I_I_I:%.*]], [[ARRAYINIT_BODY_I_I_I]] ]
// CHECK-NEXT: [[ARRAYINIT_CUR_PTR_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[ARRAYINIT_CUR_IDX_I_I_I]]
// CHECK-NEXT: store i16 0, ptr [[ARRAYINIT_CUR_PTR_I_I_I]], align 2, !alias.scope [[META32:![0-9]+]]
// CHECK-NEXT: [[ARRAYINIT_CUR_ADD_I_I_I]] = add nuw nsw i64 [[ARRAYINIT_CUR_IDX_I_I_I]], 2
// CHECK-NEXT: [[ARRAYINIT_DONE_I_I_I:%.*]] = icmp eq i64 [[ARRAYINIT_CUR_ADD_I_I_I]], 8
// CHECK-NEXT: br i1 [[ARRAYINIT_DONE_I_I_I]], label [[FOR_COND_I_I:%.*]], label [[ARRAYINIT_BODY_I_I_I]]
// CHECK: for.cond.i.i:
// CHECK-NEXT: [[S_0_I_I:%.*]] = phi i32 [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ], [ 0, [[ARRAYINIT_BODY_I_I_I]] ]
// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[S_0_I_I]], 4
// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V117SELECT_FROM_GROUPINS0_9SUB_GROUPENS0_6MARRAYINS0_3EXT6ONEAPI8BFLOAT16ELM4EEEEENST9ENABLE_IFIXAAOOSR3STDE9IS_SAME_VINST5DECAYIT_E4TYPEES2_ESR4SYCL3EXT6ONEAPI12EXPERIMENTALE27IS_USER_CONSTRUCTED_GROUP_VISC_EOOSR3STDE23IS_TRIVIALLY_COPYABLE_VIT0_ESR6DETAIL6IS_VECISD_EE5VALUEESD_E4TYPEESA_SD_NSA_7ID_TYPEE_EXIT:%.*]]
// CHECK: for.body.i.i:
// CHECK-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[S_0_I_I]] to i64
// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[AGG_TMP14_I]], i64 0, i64 [[CONV_I_I]]
// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA14]], !noalias [[META32]]
// CHECK-NEXT: [[CALL4_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @_Z28__spirv_SubgroupShuffleINTELItET_S0_j(i16 noundef zeroext [[TMP1]], i32 noundef 1) #[[ATTR6]], !noalias [[META33:![0-9]+]]
// CHECK-NEXT: [[ARRAYIDX_I13_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[REF_TMP]], i64 0, i64 [[CONV_I_I]]
// CHECK-NEXT: store i16 [[CALL4_I_I_I_I]], ptr [[ARRAYIDX_I13_I_I]], align 2, !tbaa [[TBAA14]], !alias.scope [[META32]]
// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i32 [[S_0_I_I]], 1
// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP36:![0-9]+]]
// CHECK: _ZN4sycl3_V117select_from_groupINS0_9sub_groupENS0_6marrayINS0_3ext6oneapi8bfloat16ELm4EEEEENSt9enable_ifIXaaoosr3stdE9is_same_vINSt5decayIT_E4typeES2_Esr4sycl3ext6oneapi12experimentalE27is_user_constructed_group_vISC_Eoosr3stdE23is_trivially_copyable_vIT0_Esr6detail6is_vecISD_EE5valueESD_E4typeESA_SD_NSA_7id_typeE.exit:
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[AGG_TMP14_I]]), !noalias [[META26]]
// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[REF_TMP]], align 2, !tbaa [[TBAA25]]
// CHECK-NEXT: store i64 [[TMP2]], ptr addrspace(4) [[ARRAYIDX]], align 2, !tbaa [[TBAA25]]
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[REF_TMP]]) #[[ATTR5]]
// CHECK-NEXT: ret void
//
SYCL_EXTERNAL void test_shuffle2(sycl::sub_group &sg, marray<bfloat16, 4> *buf,
size_t id) {
marray<bfloat16, 4> ItemVal = buf[id];
buf[id] = sycl::select_from_group(sg, ItemVal, 1);
}

// CHECK-LABEL: @_Z13test_shuffle3RN4sycl3_V19sub_groupEPNS0_6marrayINS0_3ext6oneapi8bfloat16ELm5EEEm(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[AGG_TMP14_I:%.*]] = alloca %"class.sycl::_V1::marray.32", align 8
// CHECK-NEXT: [[REF_TMP:%.*]] = alloca %"class.sycl::_V1::marray.32", align 2
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw %"class.sycl::_V1::marray.32", ptr addrspace(4) [[BUF:%.*]], i64 [[ID:%.*]]
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 10, ptr nonnull [[REF_TMP]]) #[[ATTR5]]
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 10, ptr nonnull [[AGG_TMP14_I]]), !noalias [[META38:![0-9]+]]
// CHECK-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr noundef nonnull align 8 dereferenceable(10) [[AGG_TMP14_I]], ptr addrspace(4) noundef align 2 dereferenceable(10) [[ARRAYIDX]], i64 10, i1 false)
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META38]])
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]])
// CHECK-NEXT: br label [[ARRAYINIT_BODY_I_I_I:%.*]]
// CHECK: arrayinit.body.i.i.i:
// CHECK-NEXT: [[ARRAYINIT_CUR_IDX_I_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ARRAYINIT_CUR_ADD_I_I_I:%.*]], [[ARRAYINIT_BODY_I_I_I]] ]
// CHECK-NEXT: [[ARRAYINIT_CUR_PTR_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[ARRAYINIT_CUR_IDX_I_I_I]]
// CHECK-NEXT: store i16 0, ptr [[ARRAYINIT_CUR_PTR_I_I_I]], align 2, !alias.scope [[META44:![0-9]+]]
// CHECK-NEXT: [[ARRAYINIT_CUR_ADD_I_I_I]] = add nuw nsw i64 [[ARRAYINIT_CUR_IDX_I_I_I]], 2
// CHECK-NEXT: [[ARRAYINIT_DONE_I_I_I:%.*]] = icmp eq i64 [[ARRAYINIT_CUR_ADD_I_I_I]], 10
// CHECK-NEXT: br i1 [[ARRAYINIT_DONE_I_I_I]], label [[FOR_COND_I_I:%.*]], label [[ARRAYINIT_BODY_I_I_I]]
// CHECK: for.cond.i.i:
// CHECK-NEXT: [[S_0_I_I:%.*]] = phi i32 [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ], [ 0, [[ARRAYINIT_BODY_I_I_I]] ]
// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i32 [[S_0_I_I]], 5
// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V117SELECT_FROM_GROUPINS0_9SUB_GROUPENS0_6MARRAYINS0_3EXT6ONEAPI8BFLOAT16ELM5EEEEENST9ENABLE_IFIXAAOOSR3STDE9IS_SAME_VINST5DECAYIT_E4TYPEES2_ESR4SYCL3EXT6ONEAPI12EXPERIMENTALE27IS_USER_CONSTRUCTED_GROUP_VISC_EOOSR3STDE23IS_TRIVIALLY_COPYABLE_VIT0_ESR6DETAIL6IS_VECISD_EE5VALUEESD_E4TYPEESA_SD_NSA_7ID_TYPEE_EXIT:%.*]]
// CHECK: for.body.i.i:
// CHECK-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[S_0_I_I]] to i64
// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds [5 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[AGG_TMP14_I]], i64 0, i64 [[CONV_I_I]]
// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA14]], !noalias [[META44]]
// CHECK-NEXT: [[CALL4_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @_Z28__spirv_SubgroupShuffleINTELItET_S0_j(i16 noundef zeroext [[TMP0]], i32 noundef 1) #[[ATTR6]], !noalias [[META45:![0-9]+]]
// CHECK-NEXT: [[ARRAYIDX_I13_I_I:%.*]] = getelementptr inbounds [5 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[REF_TMP]], i64 0, i64 [[CONV_I_I]]
// CHECK-NEXT: store i16 [[CALL4_I_I_I_I]], ptr [[ARRAYIDX_I13_I_I]], align 2, !tbaa [[TBAA14]], !alias.scope [[META44]]
// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i32 [[S_0_I_I]], 1
// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP48:![0-9]+]]
// CHECK: _ZN4sycl3_V117select_from_groupINS0_9sub_groupENS0_6marrayINS0_3ext6oneapi8bfloat16ELm5EEEEENSt9enable_ifIXaaoosr3stdE9is_same_vINSt5decayIT_E4typeES2_Esr4sycl3ext6oneapi12experimentalE27is_user_constructed_group_vISC_Eoosr3stdE23is_trivially_copyable_vIT0_Esr6detail6is_vecISD_EE5valueESD_E4typeESA_SD_NSA_7id_typeE.exit:
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 10, ptr nonnull [[AGG_TMP14_I]]), !noalias [[META38]]
// CHECK-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 2 [[ARRAYIDX]], ptr align 2 [[REF_TMP]], i64 10, i1 false), !tbaa.struct [[TBAA_STRUCT49:![0-9]+]]
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 10, ptr nonnull [[REF_TMP]]) #[[ATTR5]]
// CHECK-NEXT: ret void
//
SYCL_EXTERNAL void test_shuffle3(sycl::sub_group &sg, marray<bfloat16, 5> *buf,
size_t id) {
marray<bfloat16, 5> ItemVal = buf[id];
buf[id] = sycl::select_from_group(sg, ItemVal, 1);
}

// CHECK-LABEL: @_Z13test_shuffle4RN4sycl3_V19sub_groupEPPim(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr addrspace(4), ptr addrspace(4) [[BUF:%.*]], i64 [[ID:%.*]]
// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX]], align 8, !tbaa [[TBAA51:![0-9]+]]
// CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(4) [[TMP0]] to i64
// CHECK-NEXT: [[CALL4_I_I_I:%.*]] = tail call spir_func noundef i64 @_Z28__spirv_SubgroupShuffleINTELImET_S0_j(i64 noundef [[TMP1]], i32 noundef 1) #[[ATTR6]]
// CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[CALL4_I_I_I]] to ptr addrspace(4)
// CHECK-NEXT: store ptr addrspace(4) [[TMP2]], ptr addrspace(4) [[ARRAYIDX]], align 8, !tbaa [[TBAA51]]
// CHECK-NEXT: ret void
//
SYCL_EXTERNAL void test_shuffle4(sycl::sub_group &sg, int **buf, size_t id) {
int *ItemVal = buf[id];
buf[id] = sycl::select_from_group(sg, ItemVal, 1);
}

// CHECK-LABEL: @_Z13test_shuffle5RN4sycl3_V19sub_groupEPPVim(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr addrspace(4), ptr addrspace(4) [[BUF:%.*]], i64 [[ID:%.*]]
// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX]], align 8, !tbaa [[TBAA51]]
// CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(4) [[TMP0]] to i64
// CHECK-NEXT: [[CALL4_I_I_I:%.*]] = tail call spir_func noundef i64 @_Z28__spirv_SubgroupShuffleINTELImET_S0_j(i64 noundef [[TMP1]], i32 noundef 1) #[[ATTR6]]
// CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[CALL4_I_I_I]] to ptr addrspace(4)
// CHECK-NEXT: store ptr addrspace(4) [[TMP2]], ptr addrspace(4) [[ARRAYIDX]], align 8, !tbaa [[TBAA51]]
// CHECK-NEXT: ret void
//
SYCL_EXTERNAL void test_shuffle5(sycl::sub_group &sg, volatile int **buf,
size_t id) {
volatile int *ItemVal = buf[id];
buf[id] = sycl::select_from_group(sg, ItemVal, 1);
}