@@ -1098,11 +1098,11 @@ struct llama_model_loader {
1098
1098
this ->use_mmap = use_mmap;
1099
1099
}
1100
1100
1101
- void calc_sizes (size_t * ctx_size_p, size_t * mmapped_size_p) const {
1102
- * ctx_size_p = * mmapped_size_p = 0 ;
1101
+ void calc_sizes (size_t & ctx_size_p, size_t & mmapped_size_p) const {
1102
+ ctx_size_p = mmapped_size_p = 0 ;
1103
1103
for (const llama_load_tensor & lt : tensors_map.tensors ) {
1104
- * ctx_size_p += sizeof (struct ggml_tensor ) + GGML_OBJECT_SIZE;
1105
- * (use_mmap ? mmapped_size_p : ctx_size_p) += lt.size + 16 ;
1104
+ ctx_size_p += sizeof (struct ggml_tensor ) + GGML_OBJECT_SIZE;
1105
+ (use_mmap ? mmapped_size_p : ctx_size_p) += ggml_nbytes_pad ( lt.ggml_tensor ) ;
1106
1106
}
1107
1107
}
1108
1108
@@ -1159,19 +1159,19 @@ struct llama_model_loader {
1159
1159
}
1160
1160
1161
1161
void load_all_data (llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) {
1162
- size_t data_size = 0 ;
1163
- size_t prefetch_size = 0 ;
1164
- size_t lock_size = 0 ;
1162
+ size_t data_size = 0 ;
1163
+ size_t lock_size = 0 ;
1164
+ size_t pref_size = 0 ; // prefetch
1165
1165
1166
1166
for (const llama_load_tensor & lt : tensors_map.tensors ) {
1167
1167
data_size += lt.size ;
1168
1168
if (lt.ggml_tensor ->backend == GGML_BACKEND_CPU) {
1169
- prefetch_size += lt.size ;
1169
+ pref_size += lt.size ;
1170
1170
}
1171
1171
}
1172
1172
1173
1173
if (use_mmap) {
1174
- mapping.reset (new llama_mmap (&file_loader->file , prefetch_size , ggml_is_numa ()));
1174
+ mapping.reset (new llama_mmap (&file_loader->file , pref_size , ggml_is_numa ()));
1175
1175
if (lmlock) {
1176
1176
lmlock->init (mapping->addr );
1177
1177
}
@@ -1404,7 +1404,7 @@ static void llama_model_load_internal(
1404
1404
1405
1405
size_t ctx_size;
1406
1406
size_t mmapped_size;
1407
- ml->calc_sizes (& ctx_size, & mmapped_size);
1407
+ ml->calc_sizes (ctx_size, mmapped_size);
1408
1408
LLAMA_LOG_INFO (" %s: ggml ctx size = %7.2f MB\n " , __func__, ctx_size/1024.0 /1024.0 );
1409
1409
1410
1410
// create the ggml context
@@ -3688,7 +3688,7 @@ int llama_apply_lora_from_file_internal(const struct llama_model & model, const
3688
3688
3689
3689
size_t ctx_size;
3690
3690
size_t mmapped_size;
3691
- model_loader->calc_sizes (& ctx_size, & mmapped_size);
3691
+ model_loader->calc_sizes (ctx_size, mmapped_size);
3692
3692
base_buf.resize (ctx_size);
3693
3693
3694
3694
ggml_init_params base_params;
0 commit comments