Skip to content

Commit 8f685c4

Browse files
[SYCL] Don't value-initialize sycl::vec unnecessarily (#18244)
This is limited to the preview mode where the regression happened while re-implementing vec/swizzle.
1 parent 167a4fa commit 8f685c4

File tree

2 files changed

+33
-55
lines changed

2 files changed

+33
-55
lines changed

sycl/include/sycl/vector.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ template <typename DataT> class vec_base<DataT, 1> {
287287

288288
protected:
289289
static constexpr int alignment = (std::min)((size_t)64, sizeof(DataType));
290-
alignas(alignment) DataType m_Data{};
290+
alignas(alignment) DataType m_Data;
291291

292292
public:
293293
constexpr vec_base() = default;

sycl/test/check_device_code/vector/convert_bfloat_preview.cpp

Lines changed: 32 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -90,23 +90,12 @@ SYCL_EXTERNAL auto TestBFtointDeviceRZ(vec<bfloat16, 3> &inp) {
9090
}
9191

9292
// CHECK-LABEL: define dso_local spir_func void @_Z20TestBFtointDeviceRNERN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi1EEE(
93-
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.108") align 4 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 2 captures(none) dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{
93+
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.108") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 2 captures(none) dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{
9494
// CHECK-NEXT: entry:
95-
// CHECK-NEXT: [[RESULT_I:%.*]] = alloca %"class.sycl::_V1::vec.108", align 4
9695
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]])
97-
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[RESULT_I]])
98-
// CHECK-NEXT: br label [[ARRAYINIT_BODY_I_I_I:%.*]]
99-
// CHECK: arrayinit.body.i.i.i:
100-
// CHECK-NEXT: [[ARRAYINIT_CUR_I_I_I:%.*]] = phi ptr [ [[RESULT_I]], [[ENTRY:%.*]] ], [ [[ARRAYINIT_NEXT_I_I_I:%.*]], [[ARRAYINIT_BODY_I_I_I]] ]
101-
// CHECK-NEXT: store i32 0, ptr [[ARRAYINIT_CUR_I_I_I]], align 4, !tbaa [[TBAA24:![0-9]+]], !noalias [[META21]]
102-
// CHECK-NEXT: [[ARRAYINIT_NEXT_I_I_I]] = getelementptr inbounds i8, ptr [[ARRAYINIT_CUR_I_I_I]], i64 4
103-
// CHECK-NEXT: [[ARRAYINIT_DONE_I_I_I:%.*]] = icmp eq ptr [[ARRAYINIT_CUR_I_I_I]], [[RESULT_I]]
104-
// CHECK-NEXT: br i1 [[ARRAYINIT_DONE_I_I_I]], label [[_ZNK4SYCL3_V13VECINS0_3EXT6ONEAPI8BFLOAT16ELI1EE7CONVERTIILNS_13ROUNDING_MODEE0EEENS1_IT_LI1EEEV_EXIT:%.*]], label [[ARRAYINIT_BODY_I_I_I]]
105-
// CHECK: _ZNK4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi1EE7convertIiLNS_13rounding_modeE0EEENS1_IT_Li1EEEv.exit:
10696
// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr addrspace(4) [[INP]], align 2, !tbaa [[TBAA10]], !noalias [[META21]]
107-
// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef i32 @__imf_bfloat162int_rn(i16 noundef zeroext [[TMP0]]) #[[ATTR4]], !noalias [[META21]]
97+
// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef i32 @__imf_bfloat162int_rn(i16 noundef zeroext [[TMP0]]) #[[ATTR4]], !noalias [[META21]]
10898
// CHECK-NEXT: store i32 [[CALL_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META21]]
109-
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[RESULT_I]])
11099
// CHECK-NEXT: ret void
111100
//
112101
SYCL_EXTERNAL auto TestBFtointDeviceRNE(vec<bfloat16, 1> &inp) {
@@ -118,20 +107,20 @@ SYCL_EXTERNAL auto TestBFtointDeviceRNE(vec<bfloat16, 1> &inp) {
118107
// CHECK-NEXT: entry:
119108
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I:%.*]] = alloca <3 x float>, align 16
120109
// CHECK-NEXT: [[DST_I_I_I_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
121-
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]])
122-
// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x float>, ptr addrspace(4) [[INP]], align 16, !noalias [[META26]]
123-
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META26]]
124-
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META26]]
110+
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]])
111+
// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x float>, ptr addrspace(4) [[INP]], align 16, !noalias [[META24]]
112+
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META24]]
113+
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META24]]
125114
// CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I]] to ptr addrspace(4)
126115
// CHECK-NEXT: [[DST_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I]] to ptr addrspace(4)
127116
// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
128-
// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I]], align 16, !tbaa [[TBAA10]], !noalias [[META26]]
129-
// CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I]]) #[[ATTR4]], !noalias [[META26]]
130-
// CHECK-NEXT: [[LOADVECN_I_I_I_I_I:%.*]] = load <4 x i16>, ptr [[DST_I_I_I_I]], align 2, !noalias [[META26]]
131-
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META26]]
132-
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META26]]
117+
// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I]], align 16, !tbaa [[TBAA10]], !noalias [[META24]]
118+
// CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I]]) #[[ATTR4]], !noalias [[META24]]
119+
// CHECK-NEXT: [[LOADVECN_I_I_I_I_I:%.*]] = load <4 x i16>, ptr [[DST_I_I_I_I]], align 2, !noalias [[META24]]
120+
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META24]]
121+
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META24]]
133122
// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I_I_I_I]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
134-
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META26]]
123+
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META24]]
135124
// CHECK-NEXT: ret void
136125
//
137126
SYCL_EXTERNAL auto TestFtoBFDeviceRNE(vec<float, 3> &inp) {
@@ -141,8 +130,8 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRNE(vec<float, 3> &inp) {
141130
// CHECK-LABEL: define dso_local spir_func void @_Z17TestFtoBFDeviceRZRN4sycl3_V13vecIfLi3EEE(
142131
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.32") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 16 captures(none) dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{
143132
// CHECK-NEXT: entry:
144-
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]])
145-
// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x float>, ptr addrspace(4) [[INP]], align 16, !noalias [[META29]]
133+
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]])
134+
// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x float>, ptr addrspace(4) [[INP]], align 16, !noalias [[META27]]
146135
// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
147136
// CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]]
148137
// CHECK: for.cond.i.i.i:
@@ -152,13 +141,13 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRNE(vec<float, 3> &inp) {
152141
// CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZNK4SYCL3_V13VECIFLI3EE7CONVERTINS0_3EXT6ONEAPI8BFLOAT16ELNS_13ROUNDING_MODEE2EEENS1_IT_LI3EEEV_EXIT:%.*]]
153142
// CHECK: for.body.i.i.i:
154143
// CHECK-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <3 x float> [[EXTRACTVEC_I_I]], i32 [[I_0_I_I_I]]
155-
// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_float2bfloat16_rz(float noundef [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META29]]
144+
// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_float2bfloat16_rz(float noundef [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META27]]
156145
// CHECK-NEXT: [[VECINS_I_I_I]] = insertelement <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], i16 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]]
157146
// CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1
158-
// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP32:![0-9]+]]
147+
// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP30:![0-9]+]]
159148
// CHECK: _ZNK4sycl3_V13vecIfLi3EE7convertINS0_3ext6oneapi8bfloat16ELNS_13rounding_modeE2EEENS1_IT_Li3EEEv.exit:
160149
// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
161-
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META29]]
150+
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META27]]
162151
// CHECK-NEXT: ret void
163152
//
164153
SYCL_EXTERNAL auto TestFtoBFDeviceRZ(vec<float, 3> &inp) {
@@ -168,8 +157,8 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRZ(vec<float, 3> &inp) {
168157
// CHECK-LABEL: define dso_local spir_func void @_Z19TestInttoBFDeviceRZRN4sycl3_V13vecIiLi3EEE(
169158
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.32") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 16 captures(none) dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{
170159
// CHECK-NEXT: entry:
171-
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]])
172-
// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i32>, ptr addrspace(4) [[INP]], align 16, !noalias [[META33]]
160+
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META31:![0-9]+]])
161+
// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i32>, ptr addrspace(4) [[INP]], align 16, !noalias [[META31]]
173162
// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x i32> [[LOADVECN_I_I]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
174163
// CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]]
175164
// CHECK: for.cond.i.i.i:
@@ -179,37 +168,26 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRZ(vec<float, 3> &inp) {
179168
// CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZNK4SYCL3_V13VECIILI3EE7CONVERTINS0_3EXT6ONEAPI8BFLOAT16ELNS_13ROUNDING_MODEE2EEENS1_IT_LI3EEEV_EXIT:%.*]]
180169
// CHECK: for.body.i.i.i:
181170
// CHECK-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <3 x i32> [[EXTRACTVEC_I_I]], i32 [[I_0_I_I_I]]
182-
// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_int2bfloat16_rz(i32 noundef [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META33]]
171+
// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_int2bfloat16_rz(i32 noundef [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META31]]
183172
// CHECK-NEXT: [[VECINS_I_I_I]] = insertelement <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], i16 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]]
184173
// CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1
185-
// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP36:![0-9]+]]
174+
// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP34:![0-9]+]]
186175
// CHECK: _ZNK4sycl3_V13vecIiLi3EE7convertINS0_3ext6oneapi8bfloat16ELNS_13rounding_modeE2EEENS1_IT_Li3EEEv.exit:
187176
// CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
188-
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META33]]
177+
// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META31]]
189178
// CHECK-NEXT: ret void
190179
//
191180
SYCL_EXTERNAL auto TestInttoBFDeviceRZ(vec<int, 3> &inp) {
192181
return inp.template convert<bfloat16, sycl::rounding_mode::rtz>();
193182
}
194183

195184
// CHECK-LABEL: define dso_local spir_func void @_Z19TestLLtoBFDeviceRTPRN4sycl3_V13vecIxLi1EEE(
196-
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.145") align 2 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{
185+
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.145") align 2 captures(none) initializes((0, 2)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{
197186
// CHECK-NEXT: entry:
198-
// CHECK-NEXT: [[RESULT_I:%.*]] = alloca %"class.sycl::_V1::vec.145", align 2
199-
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META37:![0-9]+]])
200-
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 2, ptr nonnull [[RESULT_I]])
201-
// CHECK-NEXT: br label [[ARRAYINIT_BODY_I_I_I:%.*]]
202-
// CHECK: arrayinit.body.i.i.i:
203-
// CHECK-NEXT: [[ARRAYINIT_CUR_I_I_I:%.*]] = phi ptr [ [[RESULT_I]], [[ENTRY:%.*]] ], [ [[ARRAYINIT_NEXT_I_I_I:%.*]], [[ARRAYINIT_BODY_I_I_I]] ]
204-
// CHECK-NEXT: store i16 0, ptr [[ARRAYINIT_CUR_I_I_I]], align 2, !noalias [[META37]]
205-
// CHECK-NEXT: [[ARRAYINIT_NEXT_I_I_I]] = getelementptr inbounds i8, ptr [[ARRAYINIT_CUR_I_I_I]], i64 2
206-
// CHECK-NEXT: [[ARRAYINIT_DONE_I_I_I:%.*]] = icmp eq ptr [[ARRAYINIT_CUR_I_I_I]], [[RESULT_I]]
207-
// CHECK-NEXT: br i1 [[ARRAYINIT_DONE_I_I_I]], label [[_ZNK4SYCL3_V13VECIXLI1EE7CONVERTINS0_3EXT6ONEAPI8BFLOAT16ELNS_13ROUNDING_MODEE3EEENS1_IT_LI1EEEV_EXIT:%.*]], label [[ARRAYINIT_BODY_I_I_I]]
208-
// CHECK: _ZNK4sycl3_V13vecIxLi1EE7convertINS0_3ext6oneapi8bfloat16ELNS_13rounding_modeE3EEENS1_IT_Li1EEEv.exit:
209-
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[INP]], align 8, !tbaa [[TBAA40:![0-9]+]], !noalias [[META37]]
210-
// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__imf_ll2bfloat16_ru(i64 noundef [[TMP0]]) #[[ATTR4]], !noalias [[META37]]
211-
// CHECK-NEXT: store i16 [[CALL_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 2, !alias.scope [[META37]]
212-
// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 2, ptr nonnull [[RESULT_I]])
187+
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META35:![0-9]+]])
188+
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[INP]], align 8, !tbaa [[TBAA38:![0-9]+]], !noalias [[META35]]
189+
// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_ll2bfloat16_ru(i64 noundef [[TMP0]]) #[[ATTR4]], !noalias [[META35]]
190+
// CHECK-NEXT: store i16 [[CALL_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 2, !alias.scope [[META35]]
213191
// CHECK-NEXT: ret void
214192
//
215193
SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec<long long, 1> &inp) {
@@ -219,8 +197,8 @@ SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec<long long, 1> &inp) {
219197
// CHECK-LABEL: define dso_local spir_func void @_Z22TestShorttoBFDeviceRTNRN4sycl3_V13vecIsLi2EEE(
220198
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.226") align 4 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 4 captures(none) dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{
221199
// CHECK-NEXT: entry:
222-
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META42:![0-9]+]])
223-
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(4) [[INP]], align 4, !tbaa [[TBAA10]], !noalias [[META42]]
200+
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META40:![0-9]+]])
201+
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(4) [[INP]], align 4, !tbaa [[TBAA10]], !noalias [[META40]]
224202
// CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]]
225203
// CHECK: for.cond.i.i.i:
226204
// CHECK-NEXT: [[RETVAL1_0_I_I_I:%.*]] = phi <2 x i16> [ undef, [[ENTRY:%.*]] ], [ [[VECINS_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ]
@@ -229,12 +207,12 @@ SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec<long long, 1> &inp) {
229207
// CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZNK4SYCL3_V13VECISLI2EE7CONVERTINS0_3EXT6ONEAPI8BFLOAT16ELNS_13ROUNDING_MODEE4EEENS1_IT_LI2EEEV_EXIT:%.*]]
230208
// CHECK: for.body.i.i.i:
231209
// CHECK-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <2 x i16> [[TMP0]], i32 [[I_0_I_I_I]]
232-
// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_short2bfloat16_rd(i16 noundef signext [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META42]]
210+
// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_short2bfloat16_rd(i16 noundef signext [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META40]]
233211
// CHECK-NEXT: [[VECINS_I_I_I]] = insertelement <2 x i16> [[RETVAL1_0_I_I_I]], i16 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]]
234212
// CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1
235-
// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP45:![0-9]+]]
213+
// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP43:![0-9]+]]
236214
// CHECK: _ZNK4sycl3_V13vecIsLi2EE7convertINS0_3ext6oneapi8bfloat16ELNS_13rounding_modeE4EEENS1_IT_Li2EEEv.exit:
237-
// CHECK-NEXT: store <2 x i16> [[RETVAL1_0_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META42]]
215+
// CHECK-NEXT: store <2 x i16> [[RETVAL1_0_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META40]]
238216
// CHECK-NEXT: ret void
239217
//
240218
SYCL_EXTERNAL auto TestShorttoBFDeviceRTN(vec<short, 2> &inp) {

0 commit comments

Comments
 (0)