@@ -3847,11 +3847,10 @@ static void concat_f32(const float *x,const float *y, float *dst, const int ne
3847
3847
}
3848
3848
}
3849
3849
3850
- static void upscale_f32(const float *x, float *dst,
3851
- const int nb00, const int nb01, const int nb02, const int nb03,
3852
- const int ne10, const int ne11, const int ne12, const int ne13,
3853
- const float sf0, const float sf1, const float sf2, const float sf3,
3854
- const sycl::nd_item<1> &item_ct1) {
3850
+ static void upscale_f32(const float *x, float *dst, const int nb00, const int nb01,
3851
+ const int nb02, const int nb03, const int ne10, const int ne11,
3852
+ const int ne12, const int ne13, const float sf0, const float sf1,
3853
+ const float sf2, const float sf3, const sycl::nd_item<1> &item_ct1) {
3855
3854
int index = item_ct1.get_local_id(0) +
3856
3855
item_ct1.get_group(0) * item_ct1.get_local_range(0);
3857
3856
if (index >= ne10 * ne11 * ne12 * ne13) {
@@ -10092,11 +10091,10 @@ static void concat_f32_sycl(const float *x, const float *y, float *dst,
10092
10091
});
10093
10092
}
10094
10093
10095
- static void upscale_f32_sycl(const float *x, float *dst,
10096
- const int nb00, const int nb01, const int nb02, const int nb03,
10097
- const int ne10, const int ne11, const int ne12, const int ne13,
10098
- const float sf0, const float sf1, const float sf2, const float sf3,
10099
- dpct::queue_ptr stream) {
10094
+ static void upscale_f32_sycl(const float *x, float *dst, const int nb00, const int nb01,
10095
+ const int nb02, const int nb03, const int ne10, const int ne11,
10096
+ const int ne12, const int ne13, const float sf0, const float sf1,
10097
+ const float sf2, const float sf3, dpct::queue_ptr stream) {
10100
10098
int dst_size = ne10 * ne11 * ne12 * ne13;
10101
10099
int num_blocks = (dst_size + SYCL_UPSCALE_BLOCK_SIZE - 1) / SYCL_UPSCALE_BLOCK_SIZE;
10102
10100
sycl::range<1> gridDim(num_blocks * SYCL_UPSCALE_BLOCK_SIZE);
@@ -14001,7 +13999,9 @@ inline void ggml_sycl_op_upscale(const ggml_tensor *src0,
14001
13999
const float sf2 = (float)dst->ne[2]/src0->ne[2];
14002
14000
const float sf3 = (float)dst->ne[3]/src0->ne[3];
14003
14001
14004
- upscale_f32_sycl(src0_dd, dst_dd, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3, main_stream);
14002
+ upscale_f32_sycl(src0_dd, dst_dd, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3],
14003
+ dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3,
14004
+ main_stream);
14005
14005
14006
14006
(void) src1;
14007
14007
(void) dst;
0 commit comments