@@ -11578,11 +11578,8 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst,
11578
11578
}
11579
11579
char * dst_ptr = (char *) dst;
11580
11580
11581
- const int64_t ne0 = src->ne[0];
11582
- const int64_t nb0 = src->nb[0];
11583
- const int64_t nb1 = src->nb[1];
11584
- const int64_t nb2 = src->nb[2];
11585
- const int64_t nb3 = src->nb[3];
11581
+ GGML_TENSOR_LOCALS_1(int64_t, ne, src, ne);
11582
+ GGML_TENSOR_LOCALS(int64_t, nb, src, nb);
11586
11583
const enum ggml_type type = src->type;
11587
11584
const int64_t ts = ggml_type_size(type);
11588
11585
const int64_t bs = ggml_blck_size(type);
@@ -12426,9 +12423,7 @@ inline void ggml_sycl_op_alibi(const ggml_tensor *src0, const ggml_tensor *src1,
12426
12423
GGML_ASSERT(src0->type == GGML_TYPE_F32);
12427
12424
GGML_ASSERT( dst->type == GGML_TYPE_F32);
12428
12425
12429
- const int64_t ne00 = src0->ne[0];
12430
- const int64_t ne01 = src0->ne[1];
12431
- const int64_t ne02 = src0->ne[2];
12426
+ GGML_TENSOR_LOCALS_3(int64_t, ne0, src0, ne);
12432
12427
const int64_t nrows = ggml_nrows(src0);
12433
12428
12434
12429
//const int n_past = ((int32_t *) dst->op_params)[0];
@@ -12758,15 +12753,9 @@ static void ggml_sycl_op_mul_mat(const ggml_tensor *src0,
12758
12753
ggml_sycl_op_mul_mat_t op,
12759
12754
const bool convert_src1_to_q8_1) try {
12760
12755
12761
- const int64_t ne00 = src0->ne[0];
12762
- const int64_t ne01 = src0->ne[1];
12763
- const int64_t ne02 = src0->ne[2];
12764
- const int64_t ne03 = src0->ne[3];
12756
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
12765
12757
12766
- const int64_t ne10 = src1->ne[0];
12767
- const int64_t ne11 = src1->ne[1];
12768
- const int64_t ne12 = src1->ne[2];
12769
- const int64_t ne13 = src1->ne[3];
12758
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
12770
12759
const int64_t nrows1 = ggml_nrows(src1);
12771
12760
12772
12761
GGML_ASSERT(ne03 == ne13);
@@ -13337,23 +13326,13 @@ static void ggml_sycl_mul_mat_mat_batched_sycl(const ggml_tensor *src0,
13337
13326
GGML_ASSERT(src0->type == GGML_TYPE_F16);
13338
13327
GGML_ASSERT(src1->type == GGML_TYPE_F32);
13339
13328
13340
- const int64_t ne00 = src0->ne[0]; GGML_UNUSED(ne00);
13341
- const int64_t ne01 = src0->ne[1];
13342
- const int64_t ne02 = src0->ne[2];
13343
- const int64_t ne03 = src0->ne[3];
13329
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
13344
13330
13345
- const int64_t nb01 = src0->nb[1];
13346
- const int64_t nb02 = src0->nb[2]; GGML_UNUSED(nb02);
13347
- const int64_t nb03 = src0->nb[3]; GGML_UNUSED(nb03);
13331
+ GGML_TENSOR_LOCALS(int64_t, nb0, src0, nb);
13348
13332
13349
- const int64_t ne10 = src1->ne[0];
13350
- const int64_t ne11 = src1->ne[1];
13351
- const int64_t ne12 = src1->ne[2];
13352
- const int64_t ne13 = src1->ne[3];
13333
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
13353
13334
13354
- const int64_t nb11 = src1->nb[1];
13355
- const int64_t nb12 = src1->nb[2]; GGML_UNUSED(nb12);
13356
- const int64_t nb13 = src1->nb[3]; GGML_UNUSED(nb13);
13335
+ GGML_TENSOR_LOCALS(int64_t, nb1, src1, nb);
13357
13336
13358
13337
const int64_t ne1 = ggml_nelements(src1);
13359
13338
const int64_t ne = ggml_nelements(dst);
@@ -13655,23 +13634,15 @@ static void ggml_sycl_mul_mat_id_sycl(ggml_tensor * dst) {
13655
13634
GGML_ASSERT(src00->backend != GGML_BACKEND_GPU_SPLIT);
13656
13635
GGML_ASSERT(src1->type == GGML_TYPE_F32);
13657
13636
13658
- const int64_t ne00 = src00->ne[0]; GGML_UNUSED(ne00);
13659
- const int64_t ne01 = src00->ne[1];
13660
- const int64_t ne02 = src00->ne[2];
13661
- const int64_t ne03 = src00->ne[3];
13637
+ GGML_TENSOR_LOCALS(int64_t, ne0, src00, ne);
13662
13638
13663
13639
//const int64_t nb01 = src00->nb[1];
13664
- const int64_t nb02 = src00->nb[2]; GGML_UNUSED(nb02);
13665
- const int64_t nb03 = src00->nb[3]; GGML_UNUSED(nb03);
13640
+ GGML_TENSOR_LOCALS(int64_t, nb0, src00, nb);
13666
13641
13667
- const int64_t ne10 = src1->ne[0];
13668
- const int64_t ne11 = src1->ne[1];
13669
- const int64_t ne12 = src1->ne[2];
13670
- const int64_t ne13 = src1->ne[3];
13642
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
13671
13643
13644
+ GGML_TENSOR_LOCALS(int64_t, nb1, src1, nb);
13672
13645
//const int64_t nb11 = src1->nb[1];
13673
- const int64_t nb12 = src1->nb[2]; GGML_UNUSED(nb12);
13674
- const int64_t nb13 = src1->nb[3]; GGML_UNUSED(nb13);
13675
13646
13676
13647
const int64_t ne1 = ggml_nelements(src1);
13677
13648
const int64_t ne = ggml_nelements(dst);
@@ -13940,25 +13911,7 @@ static void ggml_sycl_cpy(const ggml_tensor *src0, const ggml_tensor *src1,
13940
13911
GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX);
13941
13912
GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX);
13942
13913
13943
- const int64_t ne00 = src0->ne[0];
13944
- const int64_t ne01 = src0->ne[1];
13945
- const int64_t ne02 = src0->ne[2];
13946
-
13947
-
13948
- const int64_t nb00 = src0->nb[0];
13949
- const int64_t nb01 = src0->nb[1];
13950
- const int64_t nb02 = src0->nb[2];
13951
- const int64_t nb03 = src0->nb[3];
13952
-
13953
- const int64_t ne10 = src1->ne[0];
13954
- const int64_t ne11 = src1->ne[1];
13955
- const int64_t ne12 = src1->ne[2];
13956
-
13957
-
13958
- const int64_t nb10 = src1->nb[0];
13959
- const int64_t nb11 = src1->nb[1];
13960
- const int64_t nb12 = src1->nb[2];
13961
- const int64_t nb13 = src1->nb[3];
13914
+ GGML_TENSOR_BINARY_OP_LOCALS;
13962
13915
13963
13916
SYCL_CHECK(ggml_sycl_set_device(g_main_device));
13964
13917
dpct::queue_ptr main_stream = g_syclStreams[g_main_device_index][0];
0 commit comments