@@ -2712,9 +2712,12 @@ static const char * GGML_OP_LABEL[GGML_OP_COUNT] = {
2712
2712
2713
2713
"FLASH_ATTN" ,
2714
2714
"FLASH_FF" ,
2715
+
2716
+ "MAP_UNARY" ,
2717
+ "MAP_BINARY" ,
2715
2718
};
2716
2719
2717
- static_assert (GGML_OP_COUNT == 36 , "GGML_OP_COUNT != 36 " );
2720
+ static_assert (GGML_OP_COUNT == 38 , "GGML_OP_COUNT != 38 " );
2718
2721
2719
2722
static const char * GGML_OP_SYMBOL [GGML_OP_COUNT ] = {
2720
2723
"none" ,
@@ -2757,9 +2760,12 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
2757
2760
2758
2761
"flash_attn(x)" ,
2759
2762
"flash_ff(x)" ,
2763
+
2764
+ "f(x)" ,
2765
+ "f(x,y)" ,
2760
2766
};
2761
2767
2762
- static_assert (GGML_OP_COUNT == 36 , "GGML_OP_COUNT != 36 " );
2768
+ static_assert (GGML_OP_COUNT == 38 , "GGML_OP_COUNT != 38 " );
2763
2769
2764
2770
static_assert (sizeof (struct ggml_object )%GGML_MEM_ALIGN == 0 , "ggml_object size must be a multiple of GGML_MEM_ALIGN" );
2765
2771
static_assert (sizeof (struct ggml_tensor )%GGML_MEM_ALIGN == 0 , "ggml_tensor size must be a multiple of GGML_MEM_ALIGN" );
@@ -3054,9 +3060,11 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
3054
3060
return NULL ;
3055
3061
}
3056
3062
3063
+ const size_t mem_size = (params .mem_size + GGML_MEM_ALIGN - 1 ) & ~(GGML_MEM_ALIGN - 1 );
3064
+
3057
3065
* ctx = (struct ggml_context ) {
3058
- /*.mem_size =*/ params . mem_size ,
3059
- /*.mem_buffer =*/ params .mem_buffer ? params .mem_buffer : GGML_ALIGNED_MALLOC (params . mem_size ),
3066
+ /*.mem_size =*/ mem_size ,
3067
+ /*.mem_buffer =*/ params .mem_buffer ? params .mem_buffer : GGML_ALIGNED_MALLOC (mem_size ),
3060
3068
/*.mem_buffer_owned =*/ params .mem_buffer ? false : true,
3061
3069
/*.no_alloc =*/ params .no_alloc ,
3062
3070
/*.n_objects =*/ 0 ,
@@ -3066,7 +3074,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
3066
3074
/*.scratch_save =*/ { 0 , 0 , NULL , },
3067
3075
};
3068
3076
3069
- GGML_ASSERT (ctx -> mem_buffer != NULL ); // check for allocation failure
3077
+ GGML_ASSERT (ctx -> mem_buffer != NULL );
3070
3078
3071
3079
ggml_assert_aligned (ctx -> mem_buffer );
3072
3080
@@ -4905,6 +4913,90 @@ struct ggml_tensor * ggml_flash_ff(
4905
4913
return result ;
4906
4914
}
4907
4915
4916
+ // ggml_map_unary
4917
+
4918
+ struct ggml_tensor * ggml_map_unary_impl_f32 (
4919
+ struct ggml_context * ctx ,
4920
+ struct ggml_tensor * a ,
4921
+ const ggml_unary_op_f32_t fun ,
4922
+ bool inplace ) {
4923
+ bool is_node = false;
4924
+
4925
+ if (!inplace && a -> grad ) {
4926
+ is_node = true;
4927
+ }
4928
+
4929
+ struct ggml_tensor * addr_tensor = ggml_new_tensor_1d (ctx , GGML_TYPE_I32 , sizeof (void * ) / sizeof (int32_t ));
4930
+ * ((void (* * )(void ))addr_tensor -> data ) = (void (* )(void ))fun ;
4931
+ struct ggml_tensor * result = inplace ? ggml_view_tensor (ctx , a ) : ggml_dup_tensor (ctx , a );
4932
+
4933
+ result -> op = GGML_OP_MAP_UNARY ;
4934
+ result -> grad = is_node ? ggml_dup_tensor (ctx , result ) : NULL ;
4935
+ result -> src0 = a ;
4936
+ result -> opt [0 ] = addr_tensor ;
4937
+
4938
+ return result ;
4939
+ }
4940
+
4941
+ struct ggml_tensor * ggml_map_unary_f32 (
4942
+ struct ggml_context * ctx ,
4943
+ struct ggml_tensor * a ,
4944
+ const ggml_unary_op_f32_t fun ) {
4945
+ return ggml_map_unary_impl_f32 (ctx , a , fun , false);
4946
+ }
4947
+
4948
+ struct ggml_tensor * ggml_map_unary_inplace_f32 (
4949
+ struct ggml_context * ctx ,
4950
+ struct ggml_tensor * a ,
4951
+ const ggml_unary_op_f32_t fun ) {
4952
+ return ggml_map_unary_impl_f32 (ctx , a , fun , true);
4953
+ }
4954
+
4955
+ // ggml_map_binary
4956
+
4957
+ struct ggml_tensor * ggml_map_binary_impl_f32 (
4958
+ struct ggml_context * ctx ,
4959
+ struct ggml_tensor * a ,
4960
+ struct ggml_tensor * b ,
4961
+ const ggml_binary_op_f32_t fun ,
4962
+ bool inplace ) {
4963
+ GGML_ASSERT (ggml_are_same_shape (a , b ));
4964
+
4965
+ bool is_node = false;
4966
+
4967
+ if (!inplace && (a -> grad || b -> grad )) {
4968
+ is_node = true;
4969
+ }
4970
+
4971
+ struct ggml_tensor * addr_tensor = ggml_new_tensor_1d (ctx , GGML_TYPE_I32 , sizeof (void * ) / sizeof (int32_t ));
4972
+ * ((void (* * )(void ))addr_tensor -> data ) = (void (* )(void ))fun ;
4973
+ struct ggml_tensor * result = inplace ? ggml_view_tensor (ctx , a ) : ggml_dup_tensor (ctx , a );
4974
+
4975
+ result -> op = GGML_OP_MAP_BINARY ;
4976
+ result -> grad = is_node ? ggml_dup_tensor (ctx , result ) : NULL ;
4977
+ result -> src0 = a ;
4978
+ result -> src1 = b ;
4979
+ result -> opt [0 ] = addr_tensor ;
4980
+
4981
+ return result ;
4982
+ }
4983
+
4984
+ struct ggml_tensor * ggml_map_binary_f32 (
4985
+ struct ggml_context * ctx ,
4986
+ struct ggml_tensor * a ,
4987
+ struct ggml_tensor * b ,
4988
+ const ggml_binary_op_f32_t fun ) {
4989
+ return ggml_map_binary_impl_f32 (ctx , a , b , fun , false);
4990
+ }
4991
+
4992
+ struct ggml_tensor * ggml_map_binary_inplace_f32 (
4993
+ struct ggml_context * ctx ,
4994
+ struct ggml_tensor * a ,
4995
+ struct ggml_tensor * b ,
4996
+ const ggml_binary_op_f32_t fun ) {
4997
+ return ggml_map_binary_impl_f32 (ctx , a , b , fun , true);
4998
+ }
4999
+
4908
5000
////////////////////////////////////////////////////////////////////////////////
4909
5001
4910
5002
void ggml_set_param (
@@ -7507,18 +7599,22 @@ static void ggml_compute_forward_rope_f32(
7507
7599
// row index used to determine which thread to use
7508
7600
int ir = 0 ;
7509
7601
7602
+ const float theta_scale = powf (10000.0 , -2.0f /n_dims );
7603
+
7510
7604
for (int64_t i3 = 0 ; i3 < ne3 ; i3 ++ ) {
7511
7605
for (int64_t i2 = (mode == 0 ? 0 : n_past ); i2 < ne2 ; i2 ++ ) {
7512
7606
const int p = (mode == 0 ? n_past + i2 : i2 );
7513
7607
for (int64_t i1 = 0 ; i1 < ne1 ; i1 ++ ) {
7514
7608
if (ir ++ < ir0 ) continue ;
7515
7609
if (ir > ir1 ) break ;
7516
7610
7611
+ float theta = (float )p ;
7612
+
7517
7613
for (int i0 = 0 ; i0 < n_dims ; i0 += 2 ) {
7518
- const float theta = powf (10000.0 , ((float )- i0 )/n_dims );
7614
+ const float cos_theta = cosf (theta );
7615
+ const float sin_theta = sinf (theta );
7519
7616
7520
- const float cos_theta = cosf (p * theta );
7521
- const float sin_theta = sinf (p * theta );
7617
+ theta *= theta_scale ;
7522
7618
7523
7619
const float * const src = (float * )((char * ) src0 -> data + i3 * nb3 + i2 * nb2 + i1 * nb1 + i0 * nb0 );
7524
7620
float * dst_data = (float * )((char * ) dst -> data + i3 * nb3 + i2 * nb2 + i1 * nb1 + i0 * nb0 );
@@ -7580,18 +7676,22 @@ static void ggml_compute_forward_rope_f16(
7580
7676
// row index used to determine which thread to use
7581
7677
int ir = 0 ;
7582
7678
7679
+ const float theta_scale = powf (10000.0 , -2.0f /n_dims );
7680
+
7583
7681
for (int64_t i3 = 0 ; i3 < ne3 ; i3 ++ ) {
7584
7682
for (int64_t i2 = (mode == 0 ? 0 : n_past ); i2 < ne2 ; i2 ++ ) {
7585
7683
const int p = (mode == 0 ? n_past + i2 : i2 );
7586
7684
for (int64_t i1 = 0 ; i1 < ne1 ; i1 ++ ) {
7587
7685
if (ir ++ < ir0 ) continue ;
7588
7686
if (ir > ir1 ) break ;
7589
7687
7688
+ float theta = (float )p ;
7689
+
7590
7690
for (int i0 = 0 ; i0 < n_dims ; i0 += 2 ) {
7591
- const float theta = powf (10000.0 , ((float )- i0 )/n_dims );
7691
+ const float cos_theta = cosf (theta );
7692
+ const float sin_theta = sinf (theta );
7592
7693
7593
- const float cos_theta = cosf (p * theta );
7594
- const float sin_theta = sinf (p * theta );
7694
+ theta *= theta_scale ;
7595
7695
7596
7696
const ggml_fp16_t * const src = (ggml_fp16_t * )((char * ) src0 -> data + i3 * nb3 + i2 * nb2 + i1 * nb1 + i0 * nb0 );
7597
7697
ggml_fp16_t * dst_data = (ggml_fp16_t * )((char * ) dst -> data + i3 * nb3 + i2 * nb2 + i1 * nb1 + i0 * nb0 );
@@ -8865,6 +8965,111 @@ static void ggml_compute_forward_flash_ff(
8865
8965
}
8866
8966
}
8867
8967
8968
+ // ggml_compute_forward_map_unary
8969
+
8970
+ static void ggml_compute_forward_map_unary_f32 (
8971
+ const struct ggml_compute_params * params ,
8972
+ const struct ggml_tensor * src0 ,
8973
+ struct ggml_tensor * dst ,
8974
+ const ggml_unary_op_f32_t fun ) {
8975
+ GGML_ASSERT (ggml_are_same_shape (src0 , dst ));
8976
+
8977
+ if (params -> type == GGML_TASK_INIT || params -> type == GGML_TASK_FINALIZE ) {
8978
+ return ;
8979
+ }
8980
+
8981
+ const int n = ggml_nrows (src0 );
8982
+ const int nc = src0 -> ne [0 ];
8983
+
8984
+ assert ( dst -> nb [0 ] == sizeof (float ));
8985
+ assert (src0 -> nb [0 ] == sizeof (float ));
8986
+
8987
+ for (int i = 0 ; i < n ; i ++ ) {
8988
+ fun (nc ,
8989
+ (float * ) ((char * ) dst -> data + i * ( dst -> nb [1 ])),
8990
+ (float * ) ((char * ) src0 -> data + i * (src0 -> nb [1 ])));
8991
+ }
8992
+ }
8993
+
8994
+
8995
+ static void ggml_compute_forward_map_unary (
8996
+ const struct ggml_compute_params * params ,
8997
+ const struct ggml_tensor * src0 ,
8998
+ struct ggml_tensor * dst ,
8999
+ const ggml_unary_op_f32_t fun ) {
9000
+ switch (src0 -> type ) {
9001
+ case GGML_TYPE_F32 :
9002
+ {
9003
+ ggml_compute_forward_map_unary_f32 (params , src0 , dst , fun );
9004
+ } break ;
9005
+ case GGML_TYPE_Q4_0 :
9006
+ case GGML_TYPE_Q4_1 :
9007
+ case GGML_TYPE_I8 :
9008
+ case GGML_TYPE_I16 :
9009
+ case GGML_TYPE_I32 :
9010
+ case GGML_TYPE_F16 :
9011
+ case GGML_TYPE_COUNT :
9012
+ {
9013
+ GGML_ASSERT (false);
9014
+ } break ;
9015
+ }
9016
+ }
9017
+
9018
+ // ggml_compute_forward_map_binary
9019
+
9020
+ static void ggml_compute_forward_map_binary_f32 (
9021
+ const struct ggml_compute_params * params ,
9022
+ const struct ggml_tensor * src0 ,
9023
+ const struct ggml_tensor * src1 ,
9024
+ struct ggml_tensor * dst ,
9025
+ const ggml_binary_op_f32_t fun ) {
9026
+ assert (params -> ith == 0 );
9027
+ assert (ggml_are_same_shape (src0 , src1 ) && ggml_are_same_shape (src0 , dst ));
9028
+
9029
+ if (params -> type == GGML_TASK_INIT || params -> type == GGML_TASK_FINALIZE ) {
9030
+ return ;
9031
+ }
9032
+
9033
+ const int n = ggml_nrows (src0 );
9034
+ const int nc = src0 -> ne [0 ];
9035
+
9036
+ assert ( dst -> nb [0 ] == sizeof (float ));
9037
+ assert (src0 -> nb [0 ] == sizeof (float ));
9038
+ assert (src1 -> nb [0 ] == sizeof (float ));
9039
+
9040
+ for (int i = 0 ; i < n ; i ++ ) {
9041
+ fun (nc ,
9042
+ (float * ) ((char * ) dst -> data + i * ( dst -> nb [1 ])),
9043
+ (float * ) ((char * ) src0 -> data + i * (src0 -> nb [1 ])),
9044
+ (float * ) ((char * ) src1 -> data + i * (src1 -> nb [1 ])));
9045
+ }
9046
+ }
9047
+
9048
+
9049
+ static void ggml_compute_forward_map_binary (
9050
+ const struct ggml_compute_params * params ,
9051
+ const struct ggml_tensor * src0 ,
9052
+ const struct ggml_tensor * src1 ,
9053
+ struct ggml_tensor * dst ,
9054
+ const ggml_binary_op_f32_t fun ) {
9055
+ switch (src0 -> type ) {
9056
+ case GGML_TYPE_F32 :
9057
+ {
9058
+ ggml_compute_forward_map_binary_f32 (params , src0 , src1 , dst , fun );
9059
+ } break ;
9060
+ case GGML_TYPE_Q4_0 :
9061
+ case GGML_TYPE_Q4_1 :
9062
+ case GGML_TYPE_I8 :
9063
+ case GGML_TYPE_I16 :
9064
+ case GGML_TYPE_I32 :
9065
+ case GGML_TYPE_F16 :
9066
+ case GGML_TYPE_COUNT :
9067
+ {
9068
+ GGML_ASSERT (false);
9069
+ } break ;
9070
+ }
9071
+ }
9072
+
8868
9073
/////////////////////////////////
8869
9074
8870
9075
static void ggml_compute_forward (struct ggml_compute_params * params , struct ggml_tensor * tensor ) {
@@ -9014,6 +9219,18 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
9014
9219
{
9015
9220
ggml_compute_forward_flash_ff (params , tensor -> src0 , tensor -> src1 , tensor -> opt [0 ], tensor -> opt [1 ], tensor -> opt [2 ], tensor );
9016
9221
} break ;
9222
+ case GGML_OP_MAP_UNARY :
9223
+ {
9224
+ const ggml_unary_op_f32_t fun = * ((ggml_unary_op_f32_t * )tensor -> opt [0 ]-> data );
9225
+ ggml_compute_forward_map_unary (params , tensor -> src0 , tensor , fun );
9226
+ }
9227
+ break ;
9228
+ case GGML_OP_MAP_BINARY :
9229
+ {
9230
+ const ggml_binary_op_f32_t fun = * ((ggml_binary_op_f32_t * )tensor -> opt [0 ]-> data );
9231
+ ggml_compute_forward_map_binary (params , tensor -> src0 , tensor -> src1 , tensor , fun );
9232
+ }
9233
+ break ;
9017
9234
case GGML_OP_NONE :
9018
9235
{
9019
9236
// nop
@@ -9273,6 +9490,11 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
9273
9490
{
9274
9491
GGML_ASSERT (false); // not supported
9275
9492
} break ;
9493
+ case GGML_OP_MAP_UNARY :
9494
+ case GGML_OP_MAP_BINARY :
9495
+ {
9496
+ GGML_ASSERT (false); // not supported
9497
+ } break ;
9276
9498
case GGML_OP_NONE :
9277
9499
{
9278
9500
// nop
@@ -9765,6 +9987,11 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
9765
9987
9766
9988
work_size = MAX (work_size , cur );
9767
9989
} break ;
9990
+ case GGML_OP_MAP_UNARY :
9991
+ case GGML_OP_MAP_BINARY :
9992
+ {
9993
+ node -> n_tasks = 1 ;
9994
+ } break ;
9768
9995
case GGML_OP_NONE :
9769
9996
{
9770
9997
node -> n_tasks = 1 ;
0 commit comments