@@ -1460,17 +1460,6 @@ struct llama_mlock {
1460
1460
#endif
1461
1461
};
1462
1462
1463
- // Holds information on a tensor data source location.
1464
- struct llama_tensor_offset {
1465
- uint16_t idx; // source file index
1466
- size_t offs; // tensor data offset in the original file
1467
-
1468
- llama_tensor_offset(uint16_t idx, const char * name, struct gguf_context * gguf_ctx) : idx(idx) {
1469
- const int tensor_idx = gguf_find_tensor(gguf_ctx, name);
1470
- offs = gguf_get_data_offset(gguf_ctx) + gguf_get_tensor_offset(gguf_ctx, tensor_idx);
1471
- }
1472
- };
1473
-
1474
1463
static std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
1475
1464
std::vector<char> result(8, 0);
1476
1465
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
@@ -2829,7 +2818,18 @@ struct llama_model_loader {
2829
2818
llama_fver fver;
2830
2819
2831
2820
std::vector<std::unique_ptr<llama_mmap>> mappings;
2832
- std::unordered_map<std::string, struct llama_tensor_offset> tensors_offs; // unified tensor data offset accross files
2821
+
2822
+ // Holds information on a tensor data source location.
2823
+ struct llama_tensor_offset {
2824
+ uint16_t idx; // source file index
2825
+ size_t offs; // tensor data offset in the original file
2826
+
2827
+ llama_tensor_offset(uint16_t idx, const char * name, struct gguf_context * gguf_ctx) : idx(idx) {
2828
+ const int tensor_idx = gguf_find_tensor(gguf_ctx, name);
2829
+ offs = gguf_get_data_offset(gguf_ctx) + gguf_get_tensor_offset(gguf_ctx, tensor_idx);
2830
+ }
2831
+ };
2832
+ std::unordered_map<std::string, struct llama_tensor_offset> tensors_offs; // unified tensor data offset across files
2833
2833
2834
2834
std::unordered_map<std::string, struct llama_model_kv_override> kv_overrides;
2835
2835
@@ -2884,7 +2884,7 @@ struct llama_model_loader {
2884
2884
}
2885
2885
get_key(llm_kv(LLM_KV_SPLIT_TENSORS_COUNT), n_tensors);
2886
2886
2887
- char split_prefix[4096 ] = {0};
2887
+ char split_prefix[PATH_MAX ] = {0};
2888
2888
if (!llama_split_prefix(split_prefix, fname.c_str(), fname.size(), idx, n_split)) {
2889
2889
throw std::runtime_error(format("invalid split file: %s", fname.c_str()));
2890
2890
}
0 commit comments