@@ -253,13 +253,14 @@ static void init_model(struct my_llama_model * model) {
253
253
set_param_model (model);
254
254
255
255
// measure data size
256
- struct ggml_allocr * alloc = NULL ;
257
- alloc = ggml_allocr_new_measure (tensor_alignment);
258
- alloc_model (alloc, model);
256
+ size_t size = 0 ;
257
+ for (struct ggml_tensor * t = ggml_get_first_tensor (ctx); t != NULL ; t = ggml_get_next_tensor (ctx, t)) {
258
+ size += GGML_PAD (ggml_nbytes (t), tensor_alignment);
259
+ }
259
260
260
261
// allocate data
261
- model-> data . resize ( ggml_allocr_max_size ( alloc) + tensor_alignment) ;
262
- ggml_allocr_free (alloc );
262
+ struct ggml_allocr * alloc = NULL ;
263
+ model-> data . resize (size + tensor_alignment );
263
264
alloc = ggml_allocr_new (model->data .data (), model->data .size (), tensor_alignment);
264
265
alloc_model (alloc, model);
265
266
ggml_allocr_free (alloc);
@@ -1094,11 +1095,9 @@ int main(int argc, char ** argv) {
1094
1095
struct ggml_tensor * target_probs = ggml_new_tensor_3d (ctx_input, GGML_TYPE_F32, n_vocab, n_tokens, n_batch);
1095
1096
1096
1097
// measure required memory for input tensors
1097
- alloc = ggml_allocr_new_measure (tensor_alignment);
1098
- ggml_allocr_alloc (alloc, tokens_input);
1099
- ggml_allocr_alloc (alloc, target_probs);
1100
- size_t max_input_size = ggml_allocr_max_size (alloc) + tensor_alignment;
1101
- ggml_allocr_free (alloc);
1098
+ size_t max_input_size = GGML_PAD (ggml_nbytes (tokens_input), tensor_alignment) +
1099
+ GGML_PAD (ggml_nbytes (target_probs), tensor_alignment) +
1100
+ tensor_alignment;
1102
1101
printf (" %s: input_size = %zu bytes (%.1f MB)\n " , __func__, max_input_size, (float ) max_input_size / (1024 .0f *1024 .0f ));
1103
1102
1104
1103
// allocate input tensors
0 commit comments