@@ -2692,15 +2692,16 @@ static struct ggml_cgraph * llm_build_llama(
2692
2692
2693
2693
// KQ_scale
2694
2694
struct ggml_tensor * KQ_scale = ggml_new_tensor_1d (ctx0, GGML_TYPE_F32, 1 );
2695
+ ggml_set_name (KQ_scale, " 1/sqrt(n_embd_head)" );
2695
2696
ggml_allocr_alloc (lctx.alloc , KQ_scale);
2696
2697
if (!ggml_allocr_is_measure (lctx.alloc )) {
2697
2698
ggml_set_f32 (KQ_scale, 1 .0f /sqrtf (float (n_embd_head)));
2698
2699
}
2699
- ggml_set_name (KQ_scale, " 1/sqrt(n_embd_head)" );
2700
2700
2701
2701
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
2702
2702
struct ggml_tensor * KQ_mask = ggml_new_tensor_3d (ctx0, GGML_TYPE_F32, n_kv, n_tokens, 1 );
2703
2703
offload_func_kq (KQ_mask);
2704
+ ggml_set_name (KQ_mask, " KQ_mask" );
2704
2705
ggml_allocr_alloc (lctx.alloc , KQ_mask);
2705
2706
if (!ggml_allocr_is_measure (lctx.alloc )) {
2706
2707
float * data = (float *) KQ_mask->data ;
@@ -3081,14 +3082,16 @@ static struct ggml_cgraph * llm_build_baichaun(
3081
3082
3082
3083
// KQ_scale
3083
3084
struct ggml_tensor * KQ_scale = ggml_new_tensor_1d (ctx0, GGML_TYPE_F32, 1 );
3085
+ ggml_set_name (KQ_scale, " 1/sqrt(n_embd_head)" );
3084
3086
ggml_allocr_alloc (lctx.alloc , KQ_scale);
3085
3087
if (!ggml_allocr_is_measure (lctx.alloc )) {
3086
3088
ggml_set_f32 (KQ_scale, 1 .0f /sqrtf (float (n_embd)/n_head));
3087
3089
}
3088
- ggml_set_name (KQ_scale, " 1/sqrt(n_embd_head)" );
3089
3090
3090
3091
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
3091
3092
struct ggml_tensor * KQ_mask = ggml_new_tensor_3d (ctx0, GGML_TYPE_F32, n_kv, n_tokens, 1 );
3093
+ offload_func_kq (KQ_mask);
3094
+ ggml_set_name (KQ_mask, " KQ_mask" );
3092
3095
ggml_allocr_alloc (lctx.alloc , KQ_mask);
3093
3096
if (!ggml_allocr_is_measure (lctx.alloc )) {
3094
3097
float * data = (float *) KQ_mask->data ;
@@ -3111,6 +3114,7 @@ static struct ggml_cgraph * llm_build_baichaun(
3111
3114
// KQ_pos - contains the positions
3112
3115
struct ggml_tensor * KQ_pos = ggml_new_tensor_1d (ctx0, GGML_TYPE_I32, n_tokens);
3113
3116
offload_func_kq (KQ_pos);
3117
+ ggml_set_name (KQ_pos, " KQ_pos" );
3114
3118
ggml_allocr_alloc (lctx.alloc , KQ_pos);
3115
3119
if (!ggml_allocr_is_measure (lctx.alloc )) {
3116
3120
int * data = (int *) KQ_pos->data ;
@@ -3123,6 +3127,7 @@ static struct ggml_cgraph * llm_build_baichaun(
3123
3127
if (do_rope_shift) {
3124
3128
struct ggml_tensor * K_shift = ggml_new_tensor_1d (ctx0, GGML_TYPE_I32, n_ctx);
3125
3129
offload_func_kq (K_shift);
3130
+ ggml_set_name (K_shift, " K_shift" );
3126
3131
ggml_allocr_alloc (lctx.alloc , K_shift);
3127
3132
if (!ggml_allocr_is_measure (lctx.alloc )) {
3128
3133
int * data = (int *) K_shift->data ;
@@ -3487,14 +3492,16 @@ static struct ggml_cgraph * llm_build_falcon(
3487
3492
3488
3493
// KQ_scale
3489
3494
struct ggml_tensor * KQ_scale = ggml_new_tensor_1d (ctx0, GGML_TYPE_F32, 1 );
3495
+ ggml_set_name (KQ_scale, " 1/sqrt(n_embd_head)" );
3490
3496
ggml_allocr_alloc (lctx.alloc , KQ_scale);
3491
3497
if (!ggml_allocr_is_measure (lctx.alloc )) {
3492
3498
ggml_set_f32 (KQ_scale, 1 .0f /sqrtf (float (n_embd)/n_head));
3493
3499
}
3494
- ggml_set_name (KQ_scale, " 1/sqrt(n_embd_head)" );
3495
3500
3496
3501
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
3497
3502
struct ggml_tensor * KQ_mask = ggml_new_tensor_3d (ctx0, GGML_TYPE_F32, n_kv, n_tokens, 1 );
3503
+ offload_func_kq (KQ_mask);
3504
+ ggml_set_name (KQ_mask, " KQ_mask" );
3498
3505
ggml_allocr_alloc (lctx.alloc , KQ_mask);
3499
3506
if (!ggml_allocr_is_measure (lctx.alloc )) {
3500
3507
float * data = (float *) KQ_mask->data ;
@@ -3517,6 +3524,7 @@ static struct ggml_cgraph * llm_build_falcon(
3517
3524
// KQ_pos - contains the positions
3518
3525
struct ggml_tensor * KQ_pos = ggml_new_tensor_1d (ctx0, GGML_TYPE_I32, n_tokens);
3519
3526
offload_func_kq (KQ_pos);
3527
+ ggml_set_name (KQ_pos, " KQ_pos" );
3520
3528
ggml_allocr_alloc (lctx.alloc , KQ_pos);
3521
3529
if (!ggml_allocr_is_measure (lctx.alloc )) {
3522
3530
int * data = (int *) KQ_pos->data ;
@@ -3529,6 +3537,7 @@ static struct ggml_cgraph * llm_build_falcon(
3529
3537
if (do_rope_shift) {
3530
3538
struct ggml_tensor * K_shift = ggml_new_tensor_1d (ctx0, GGML_TYPE_I32, n_ctx);
3531
3539
offload_func_kq (K_shift);
3540
+ ggml_set_name (K_shift, " K_shift" );
3532
3541
ggml_allocr_alloc (lctx.alloc , K_shift);
3533
3542
if (!ggml_allocr_is_measure (lctx.alloc )) {
3534
3543
int * data = (int *) K_shift->data ;
@@ -3835,14 +3844,15 @@ static struct ggml_cgraph * llm_build_starcoder(
3835
3844
3836
3845
// KQ_scale
3837
3846
struct ggml_tensor * KQ_scale = ggml_new_tensor_1d (ctx0, GGML_TYPE_F32, 1 );
3847
+ ggml_set_name (KQ_scale, " 1/sqrt(n_embd_head)" );
3838
3848
ggml_allocr_alloc (lctx.alloc , KQ_scale);
3839
3849
if (!ggml_allocr_is_measure (lctx.alloc )) {
3840
3850
ggml_set_f32 (KQ_scale, 1 .0f /sqrtf (float (n_embd)/n_head));
3841
3851
}
3842
- ggml_set_name (KQ_scale, " 1/sqrt(n_embd_head)" );
3843
3852
3844
3853
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
3845
3854
struct ggml_tensor * KQ_mask = ggml_new_tensor_3d (ctx0, GGML_TYPE_F32, n_kv, n_tokens, 1 );
3855
+ ggml_set_name (KQ_mask, " KQ_mask" );
3846
3856
ggml_allocr_alloc (lctx.alloc , KQ_mask);
3847
3857
if (!ggml_allocr_is_measure (lctx.alloc )) {
3848
3858
float * data = (float *) KQ_mask->data ;
0 commit comments