@@ -60,6 +60,9 @@ void matrix_verify_add(queue q, big_matrix<T, M, N> &A, nd_range<2> &r,
60
60
auto accA = bufA.get_access <access::mode::read_write>(cgh);
61
61
62
62
<<<<<<< HEAD
63
+ <<<<<<< HEAD
64
+ =======
65
+ >>>>>>> 7bb961a5c ([SYCL][Matrix] Add missing explicit SG size statement (#764 ))
63
66
cgh.parallel_for <class add_matrix >(
64
67
r, [accA](nd_item<2 > spmd_item) [[intel::reqd_sub_group_size (SG_SZ)]] {
65
68
const auto global_idx = spmd_item.get_global_id (0 );
@@ -81,6 +84,7 @@ void matrix_verify_add(queue q, big_matrix<T, M, N> &A, nd_range<2> &r,
81
84
sg_starty / SG_SZ * TN,
82
85
N, matrix_layout::row_major);
83
86
}); // parallel for
87
+ <<<<<<< HEAD
84
88
=======
85
89
cgh.parallel_for <class add_matrix >(r, [accA](nd_item<2 > spmd_item) {
86
90
const auto global_idx = spmd_item.get_global_id (0 );
@@ -103,6 +107,8 @@ void matrix_verify_add(queue q, big_matrix<T, M, N> &A, nd_range<2> &r,
103
107
N, matrix_layout::row_major);
104
108
}); // parallel for
105
109
>>>>>>> 62e420f44 ([SYCL][Matrix] Correct a test case that redefines a class name (#757 ))
110
+ =======
111
+ >>>>>>> 7bb961a5c ([SYCL][Matrix] Add missing explicit SG size statement (#764 ))
106
112
}).wait ();
107
113
assert_ops_ref<T, M, N>(bufA.get_access <access::mode::read>(), ref);
108
114
}
@@ -116,6 +122,9 @@ void matrix_verify_sub(queue q, big_matrix<T, M, N> &A, nd_range<2> &r,
116
122
auto accA = bufA.get_access <access::mode::read_write>(cgh);
117
123
118
124
<<<<<<< HEAD
125
+ <<<<<<< HEAD
126
+ =======
127
+ >>>>>>> 7bb961a5c ([SYCL][Matrix] Add missing explicit SG size statement (#764 ))
119
128
cgh.parallel_for <class sub_matrix >(
120
129
r, [accA](nd_item<2 > spmd_item) [[intel::reqd_sub_group_size (SG_SZ)]] {
121
130
const auto global_idx = spmd_item.get_global_id (0 );
@@ -137,6 +146,7 @@ void matrix_verify_sub(queue q, big_matrix<T, M, N> &A, nd_range<2> &r,
137
146
sg_starty / SG_SZ * TN,
138
147
N, matrix_layout::row_major);
139
148
}); // parallel for
149
+ <<<<<<< HEAD
140
150
=======
141
151
cgh.parallel_for <class sub_matrix >(r, [accA](nd_item<2 > spmd_item) {
142
152
const auto global_idx = spmd_item.get_global_id (0 );
@@ -159,6 +169,8 @@ void matrix_verify_sub(queue q, big_matrix<T, M, N> &A, nd_range<2> &r,
159
169
N, matrix_layout::row_major);
160
170
}); // parallel for
161
171
>>>>>>> 62e420f44 ([SYCL][Matrix] Correct a test case that redefines a class name (#757 ))
172
+ =======
173
+ >>>>>>> 7bb961a5c ([SYCL][Matrix] Add missing explicit SG size statement (#764 ))
162
174
}).wait ();
163
175
assert_ops_ref<T, M, N>(bufA.get_access <access::mode::read>(), ref);
164
176
}
@@ -172,6 +184,9 @@ void matrix_verify_mul(queue q, big_matrix<T, M, N> &A, nd_range<2> &r,
172
184
auto accA = bufA.get_access <access::mode::read_write>(cgh);
173
185
174
186
<<<<<<< HEAD
187
+ <<<<<<< HEAD
188
+ =======
189
+ >>>>>>> 7bb961a5c ([SYCL][Matrix] Add missing explicit SG size statement (#764 ))
175
190
cgh.parallel_for <class mul_matrix >(
176
191
r, [accA](nd_item<2 > spmd_item) [[intel::reqd_sub_group_size (SG_SZ)]] {
177
192
const auto global_idx = spmd_item.get_global_id (0 );
@@ -193,6 +208,7 @@ void matrix_verify_mul(queue q, big_matrix<T, M, N> &A, nd_range<2> &r,
193
208
sg_starty / SG_SZ * TN,
194
209
N, matrix_layout::row_major);
195
210
}); // parallel for
211
+ <<<<<<< HEAD
196
212
=======
197
213
cgh.parallel_for <class mul_matrix >(r, [accA](nd_item<2 > spmd_item) {
198
214
const auto global_idx = spmd_item.get_global_id (0 );
@@ -215,6 +231,8 @@ void matrix_verify_mul(queue q, big_matrix<T, M, N> &A, nd_range<2> &r,
215
231
N, matrix_layout::row_major);
216
232
}); // parallel for
217
233
>>>>>>> 62e420f44 ([SYCL][Matrix] Correct a test case that redefines a class name (#757 ))
234
+ =======
235
+ >>>>>>> 7bb961a5c ([SYCL][Matrix] Add missing explicit SG size statement (#764 ))
218
236
}).wait ();
219
237
assert_ops_ref<T, M, N>(bufA.get_access <access::mode::read>(), ref);
220
238
}
@@ -228,6 +246,9 @@ void matrix_verify_div(queue q, big_matrix<T, M, N> &A, nd_range<2> &r,
228
246
auto accA = bufA.get_access <access::mode::read_write>(cgh);
229
247
230
248
<<<<<<< HEAD
249
+ <<<<<<< HEAD
250
+ =======
251
+ >>>>>>> 7bb961a5c ([SYCL][Matrix] Add missing explicit SG size statement (#764 ))
231
252
cgh.parallel_for <class div_matrix >(
232
253
r, [accA](nd_item<2 > spmd_item) [[intel::reqd_sub_group_size (SG_SZ)]] {
233
254
const auto global_idx = spmd_item.get_global_id (0 );
@@ -249,6 +270,7 @@ void matrix_verify_div(queue q, big_matrix<T, M, N> &A, nd_range<2> &r,
249
270
sg_starty / SG_SZ * TN,
250
271
N, matrix_layout::row_major);
251
272
}); // parallel for
273
+ <<<<<<< HEAD
252
274
=======
253
275
cgh.parallel_for <class div_matrix >(r, [accA](nd_item<2 > spmd_item) {
254
276
const auto global_idx = spmd_item.get_global_id (0 );
@@ -271,6 +293,8 @@ void matrix_verify_div(queue q, big_matrix<T, M, N> &A, nd_range<2> &r,
271
293
N, matrix_layout::row_major);
272
294
}); // parallel for
273
295
>>>>>>> 62e420f44 ([SYCL][Matrix] Correct a test case that redefines a class name (#757 ))
296
+ =======
297
+ >>>>>>> 7bb961a5c ([SYCL][Matrix] Add missing explicit SG size statement (#764 ))
274
298
}).wait ();
275
299
assert_ops_ref<T, M, N>(bufA.get_access <access::mode::read>(), ref);
276
300
}
@@ -284,6 +308,9 @@ void matrix_verify_logic(queue q, big_matrix<T, M, N> &A, nd_range<2> &r,
284
308
auto accA = bufA.get_access <access::mode::read_write>(cgh);
285
309
286
310
<<<<<<< HEAD
311
+ <<<<<<< HEAD
312
+ =======
313
+ >>>>>>> 7bb961a5c ([SYCL][Matrix] Add missing explicit SG size statement (#764 ))
287
314
cgh.parallel_for <class logic_matrix >(
288
315
r, [accA](nd_item<2 > spmd_item) [[intel::reqd_sub_group_size (SG_SZ)]] {
289
316
const auto global_idx = spmd_item.get_global_id (0 );
@@ -314,6 +341,7 @@ void matrix_verify_logic(queue q, big_matrix<T, M, N> &A, nd_range<2> &r,
314
341
}
315
342
wi_slice_a[i] = val;
316
343
}
344
+ <<<<<<< HEAD
317
345
=======
318
346
cgh.parallel_for <class logic_matrix >(r, [accA](nd_item<2 > spmd_item) {
319
347
const auto global_idx = spmd_item.get_global_id (0 );
@@ -342,6 +370,8 @@ void matrix_verify_logic(queue q, big_matrix<T, M, N> &A, nd_range<2> &r,
342
370
} else {
343
371
val += 2 ;
344
372
>>>>>>> 62e420f44 ([SYCL][Matrix] Correct a test case that redefines a class name (#757 ))
373
+ =======
374
+ >>>>>>> 7bb961a5c ([SYCL][Matrix] Add missing explicit SG size statement (#764 ))
345
375
}
346
376
}
347
377
joint_matrix_store (sg, sub_a,
0 commit comments