Skip to content

Commit 1ae304d

Browse files
committed
Add remaining SYCL exception handler to kernel and refactor
1 parent 168fe89 commit 1ae304d

27 files changed

+221
-108
lines changed

ggml/src/ggml-sycl/argmax.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ static void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, ggml_tensor * d
5252

5353
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
5454
GGML_ASSERT(dst->type == GGML_TYPE_I32);
55-
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0);
55+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
5656

5757
const int64_t ncols = dst->src[0]->ne[0];
5858
const int64_t nrows = ggml_nrows(dst->src[0]);

ggml/src/ggml-sycl/argsort.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ static void argsort_f32_i32_sycl(const float * x, int * dst, const int ncols, co
105105
inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
106106
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
107107
GGML_ASSERT(dst->type == GGML_TYPE_I32);
108+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
108109

109110
const int64_t ncols = dst->src[0]->ne[0];
110111
const int64_t nrows = ggml_nrows(dst->src[0]);

ggml/src/ggml-sycl/binbcast.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,8 @@ inline void ggml_sycl_op_bin_bcast(const ggml_tensor * src0, const ggml_tensor *
233233
}
234234

235235
inline void ggml_sycl_op_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
236+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer));
237+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
236238
const void * src0_dd = static_cast<void *>(dst->src[0]->data);
237239
const void * src1_dd = static_cast<void *>(dst->src[1]->data);
238240
void * dst_dd = static_cast<void *>(dst->data);
@@ -247,6 +249,8 @@ inline void ggml_sycl_op_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
247249
}
248250

249251
inline void ggml_sycl_op_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
252+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer));
253+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
250254
const void * src0_dd = static_cast<void *>(dst->src[0]->data);
251255
const void * src1_dd = static_cast<void *>(dst->src[1]->data);
252256
void * dst_dd = static_cast<void *>(dst->data);
@@ -261,6 +265,8 @@ inline void ggml_sycl_op_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
261265
}
262266

263267
inline void ggml_sycl_op_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
268+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer));
269+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
264270
const void * src0_dd = static_cast<void *>(dst->src[0]->data);
265271
const void * src1_dd = static_cast<void *>(dst->src[1]->data);
266272
void * dst_dd = static_cast<void *>(dst->data);
@@ -275,6 +281,8 @@ inline void ggml_sycl_op_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
275281
}
276282

277283
inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
284+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer));
285+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
278286
const void * src0_dd = static_cast<void *>(dst->src[0]->data);
279287
const void * src1_dd = static_cast<void *>(dst->src[1]->data);
280288
void * dst_dd = static_cast<void *>(dst->data);
@@ -289,6 +297,7 @@ inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
289297
}
290298

291299
inline void ggml_sycl_op_repeat(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
300+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
292301
const void * src0_d = static_cast<void *>(dst->src[0]->data);
293302
void * dst_d = static_cast<void *>(dst->data);
294303
dpct::queue_ptr main_stream = ctx.stream();

ggml/src/ggml-sycl/clamp.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ static void clamp_f32_sycl(const float * x, float * dst, const float min, const
2323
inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
2424
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
2525
GGML_ASSERT(dst->type == GGML_TYPE_F32);
26-
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0);
26+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
2727

2828
float min;
2929
float max;

ggml/src/ggml-sycl/common.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,16 @@ bool gpu_has_xmx(sycl::device &dev) {
5252
return dev.has(sycl::aspect::ext_intel_matrix);
5353
}
5454

55+
const char * ggml_backend_sycl_split_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
56+
return GGML_SYCL_NAME "_Split";
57+
58+
GGML_UNUSED(buft);
59+
}
60+
61+
bool ggml_backend_buffer_is_sycl_split(ggml_backend_buffer_t buffer) {
62+
return buffer->buft->iface.get_name == ggml_backend_sycl_split_buffer_type_get_name;
63+
}
64+
5565
int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block_size) {
5666
const int64_t max_range = std::numeric_limits<int>::max();
5767
int64_t sycl_down_blk_size = block_size;

ggml/src/ggml-sycl/common.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,8 @@ typedef void (*ggml_sycl_op_flatten_t)(ggml_backend_sycl_context & ctx, const gg
436436
const queue_ptr &main_stream);
437437

438438
bool gpu_has_xmx(sycl::device &dev);
439+
const char * ggml_backend_sycl_split_buffer_type_get_name(ggml_backend_buffer_type_t buft);
440+
bool ggml_backend_buffer_is_sycl_split(ggml_backend_buffer_t buffer);
439441

440442
// Some backend specific macros
441443
#define GGML_SYCL_TENSOR_BINARY_OP_LOCALS \

ggml/src/ggml-sycl/concat.cpp

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -159,34 +159,34 @@ static void concat_f32_sycl_non_cont(
159159
}
160160

161161
static void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
162-
const ggml_tensor *src0 = dst->src[0];
163-
const ggml_tensor *src1 = dst->src[1];
164-
queue_ptr stream = ctx.stream();
165-
SYCL_CHECK(ggml_sycl_set_device(ctx.device));
166-
167-
const int32_t dim = ((int32_t *)dst->op_params)[0];
168-
169-
if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1)) {
170-
const float *src0_d = (const float *)src0->data;
171-
const float *src1_d = (const float *)src1->data;
172-
173-
float *dst_d = (float *)dst->data;
174-
175-
if (dim != 3) {
176-
for (int i3 = 0; i3 < dst->ne[3]; i3++) {
177-
concat_f32_sycl(
178-
src0_d + i3 * (src0->nb[3] / 4), src1_d + i3 * (src1->nb[3] / 4),
179-
dst_d + i3 * (dst->nb[3] / 4), src0->ne[0], src0->ne[1],
180-
src0->ne[2], dst->ne[0], dst->ne[1], dst->ne[2], dim, stream);
181-
}
182-
} else {
183-
const size_t size0 = ggml_nbytes(src0);
184-
const size_t size1 = ggml_nbytes(src1);
185-
186-
SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d, src0_d, size0).wait()));
187-
SYCL_CHECK(CHECK_TRY_ERROR(
188-
stream->memcpy(dst_d + size0 / 4, src1_d, size1).wait()));
189-
}
162+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer));
163+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
164+
const ggml_tensor * src0 = dst->src[0];
165+
const ggml_tensor * src1 = dst->src[1];
166+
queue_ptr stream = ctx.stream();
167+
SYCL_CHECK(ggml_sycl_set_device(ctx.device));
168+
169+
const int32_t dim = ((int32_t *) dst->op_params)[0];
170+
171+
if (ggml_is_contiguous(src0) && ggml_is_contiguous(src1)) {
172+
const float * src0_d = (const float *) src0->data;
173+
const float * src1_d = (const float *) src1->data;
174+
175+
float * dst_d = (float *) dst->data;
176+
177+
if (dim != 3) {
178+
for (int i3 = 0; i3 < dst->ne[3]; i3++) {
179+
concat_f32_sycl(src0_d + i3 * (src0->nb[3] / 4), src1_d + i3 * (src1->nb[3] / 4),
180+
dst_d + i3 * (dst->nb[3] / 4), src0->ne[0], src0->ne[1], src0->ne[2], dst->ne[0],
181+
dst->ne[1], dst->ne[2], dim, stream);
182+
}
183+
} else {
184+
const size_t size0 = ggml_nbytes(src0);
185+
const size_t size1 = ggml_nbytes(src1);
186+
187+
SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d, src0_d, size0).wait()));
188+
SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(dst_d + size0 / 4, src1_d, size1).wait()));
189+
}
190190
} else
191191
concat_f32_sycl_non_cont(
192192
stream, (const char *)src0->data, (const char *)src1->data,

ggml/src/ggml-sycl/conv.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,9 @@ static void conv_transpose_1d_f32_f32_sycl(
7171
});
7272
}
7373

74-
void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
74+
static void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
75+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->src[1]->buffer));
76+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
7577
const ggml_tensor *src0 = dst->src[0];
7678
const ggml_tensor *src1 = dst->src[1];
7779
const float * src0_d = (const float *)src0->data;
@@ -97,4 +99,13 @@ void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor
9799
src0->ne[0], src0->ne[1], src0->ne[2],
98100
src1->ne[0], dst->ne[0],
99101
src0_d, src1_d, dst_d, stream);
102+
} catch (const sycl::exception & exc) {
103+
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl;
104+
std::exit(1);
100105
}
106+
107+
void ggml_sycl_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
108+
GGML_SYCL_DEBUG("call %s\n", __func__);
109+
ggml_sycl_op_conv_transpose_1d(ctx, dst);
110+
GGML_SYCL_DEBUG("call %s done\n", __func__);
111+
}

ggml/src/ggml-sycl/conv.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,6 @@
1515

1616
#include "common.hpp"
1717

18-
void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst);
18+
void ggml_sycl_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
1919

2020
#endif // GGML_SYCL_CONV_HPP

ggml/src/ggml-sycl/cpy.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,13 +339,14 @@ static void ggml_cpy_i32_i32_sycl(const char * cx, char * cdst, const int ne, co
339339
}
340340

341341
void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1) try {
342+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src1->buffer));
342343
const int64_t ne = ggml_nelements(src0);
343344
GGML_ASSERT(ne == ggml_nelements(src1));
344345

345346
GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX);
346347
GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX);
347348

348-
GGML_SYCL_TENSOR_BINARY_OP_CP_LOCALS;
349+
GGML_SYCL_TENSOR_BINARY_OP_CP_LOCALS
349350

350351
SYCL_CHECK(ggml_sycl_set_device(ctx.device));
351352
queue_ptr main_stream = ctx.stream();

ggml/src/ggml-sycl/diagmask.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ static void diag_mask_inf_f32_sycl(const float * x, float * dst, const int ncols
2929
inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) try {
3030
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
3131
GGML_ASSERT(dst->type == GGML_TYPE_F32);
32-
GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0);
32+
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(dst->buffer));
3333

3434
const int64_t ne00 = dst->src[0]->ne[0];
3535
const int64_t ne01 = dst->src[0]->ne[1];

0 commit comments

Comments
 (0)