@@ -895,43 +895,6 @@ static void clamp_f32(const float * x, float * dst, const float min, const float
895
895
dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]);
896
896
}
897
897
898
- template <typename T>
899
- static void im2col_kernel (const float *x, T *dst, int offset_delta,
900
- int IW, int IH, int OW, int KW, int KH,
901
- int pelements, int CHW, int s0, int s1, int p0,
902
- int p1, int d0, int d1,
903
- const sycl::nd_item<3 > &item_ct1) {
904
- const int i = item_ct1.get_local_id (2 ) +
905
- item_ct1.get_group (2 ) * item_ct1.get_local_range (2 );
906
- if (i >= pelements) {
907
- return ;
908
- }
909
-
910
- const int ksize = OW * (KH > 1 ? KW : 1 );
911
- const int kx = i / ksize;
912
- const int kd = kx * ksize;
913
- const int ky = (i - kd) / OW;
914
- const int ix = i % OW;
915
-
916
- const int64_t iiw = ix * s0 + kx * d0 - p0;
917
- const int64_t iih = item_ct1.get_group (1 ) * s1 + ky * d1 - p1;
918
-
919
- const int64_t offset_dst =
920
- (item_ct1.get_group (1 ) * OW + ix) * CHW +
921
- (item_ct1.get_group (0 ) * (KW * KH) + ky * KW + kx);
922
-
923
- if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
924
- dst[offset_dst] =
925
- sycl::vec<float , 1 >(0 .0f )
926
- .convert <sycl::half, sycl::rounding_mode::automatic>()[0 ];
927
- } else {
928
- const int64_t offset_src = item_ct1.get_group (0 ) * offset_delta;
929
- dst[offset_dst] =
930
- sycl::vec<float , 1 >(x[offset_src + iih * IW + iiw])
931
- .convert <sycl::half, sycl::rounding_mode::automatic>()[0 ];
932
- }
933
- }
934
-
935
898
template <typename Ti, typename To>
936
899
static void pool2d_nchw_kernel (
937
900
const int ih, const int iw, const int oh, const int ow,
@@ -2478,47 +2441,6 @@ static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, const ggml_tens
2478
2441
(void ) src1_dd;
2479
2442
}
2480
2443
2481
- inline void ggml_sycl_op_im2col (ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
2482
- const ggml_tensor *src1, ggml_tensor *dst,
2483
- const float *src0_dd, const float *src1_dd,
2484
- float *dst_dd,
2485
- const queue_ptr &main_stream) {
2486
-
2487
- GGML_ASSERT (src0->type == GGML_TYPE_F16);
2488
- GGML_ASSERT (src1->type == GGML_TYPE_F32);
2489
- GGML_ASSERT ( dst->type == GGML_TYPE_F16 || dst->type == GGML_TYPE_F32);
2490
-
2491
- const int32_t s0 = ((const int32_t *)(dst->op_params ))[0 ];
2492
- const int32_t s1 = ((const int32_t *)(dst->op_params ))[1 ];
2493
- const int32_t p0 = ((const int32_t *)(dst->op_params ))[2 ];
2494
- const int32_t p1 = ((const int32_t *)(dst->op_params ))[3 ];
2495
- const int32_t d0 = ((const int32_t *)(dst->op_params ))[4 ];
2496
- const int32_t d1 = ((const int32_t *)(dst->op_params ))[5 ];
2497
-
2498
- const bool is_2D = ((const int32_t *)(dst->op_params ))[6 ] == 1 ;
2499
-
2500
- const int64_t IC = src1->ne [is_2D ? 2 : 1 ];
2501
- const int64_t IH = is_2D ? src1->ne [1 ] : 1 ;
2502
- const int64_t IW = src1->ne [0 ];
2503
-
2504
- const int64_t KH = is_2D ? src0->ne [1 ] : 1 ;
2505
- const int64_t KW = src0->ne [0 ];
2506
-
2507
- const int64_t OH = is_2D ? dst->ne [2 ] : 1 ;
2508
- const int64_t OW = dst->ne [1 ];
2509
-
2510
- const size_t delta_offset = src1->nb [is_2D ? 2 : 1 ] / 4 ; // nb is byte offset, src is type float32
2511
-
2512
- if (dst->type == GGML_TYPE_F16) {
2513
- im2col_sycl (src1_dd, (sycl::half *)dst_dd, IW, IH, OW, OH, KW, KH, IC, delta_offset, s0, s1, p0, p1, d0, d1, main_stream);
2514
- } else {
2515
- im2col_sycl (src1_dd, (float *)dst_dd, IW, IH, OW, OH, KW, KH, IC, delta_offset, s0, s1, p0, p1, d0, d1, main_stream);
2516
- }
2517
-
2518
- (void ) src0;
2519
- (void ) src0_dd;
2520
- }
2521
-
2522
2444
inline void ggml_sycl_op_sum_rows (ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
2523
2445
const ggml_tensor *src1, ggml_tensor *dst,
2524
2446
const float *src0_dd, const float *src1_dd,
0 commit comments