Skip to content

Commit d197545

Browse files
authored
llama : bump max layers from 256 to 512 (#8530)
* llama : bump max layers from 256 to 512 * llama : replace asserts with exceptions
1 parent be0cfb4 commit d197545

File tree

2 files changed

+10
-6
lines changed

2 files changed

+10
-6
lines changed

include/llama.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
#define LLAMA_FILE_MAGIC_GGSQ 0x67677371u // 'ggsq'
4141

4242
#define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
43-
#define LLAMA_SESSION_VERSION 6
43+
#define LLAMA_SESSION_VERSION 7
4444

4545
#define LLAMA_STATE_SEQ_MAGIC LLAMA_FILE_MAGIC_GGSQ
4646
#define LLAMA_STATE_SEQ_VERSION 1

src/llama.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@
114114

115115
// bump if necessary
116116
#define LLAMA_MAX_NODES 8192
117-
#define LLAMA_MAX_LAYERS 256
117+
#define LLAMA_MAX_LAYERS 512
118118
#define LLAMA_MAX_EXPERTS 160 // DeepSeekV2
119119

120120
//
@@ -4007,7 +4007,9 @@ struct llama_model_loader {
40074007
throw std::runtime_error(format("%s is not a float32, int32 array", key.c_str()));
40084008
}
40094009

4010-
GGML_ASSERT(arr_info.length <= N_MAX);
4010+
if (arr_info.length > N_MAX) {
4011+
throw std::runtime_error(format("array length %u for key %s exceeds max %u", (uint32_t) arr_info.length, key.c_str(), (uint32_t) N_MAX));
4012+
}
40114013

40124014
std::copy((const T*)arr_info.data, (const T *)arr_info.data + arr_info.length, result.begin());
40134015

@@ -4043,8 +4045,6 @@ struct llama_model_loader {
40434045
// get array of n <= N_MAX elements, or a single element repeated n times
40444046
template<typename T, size_t N_MAX>
40454047
bool get_key_or_arr(const std::string & key, std::array<T, N_MAX> & result, uint32_t n, const bool required = true) {
4046-
GGML_ASSERT(n <= N_MAX);
4047-
40484048
const int kid = gguf_find_key(meta, key.c_str());
40494049

40504050
if (kid < 0) {
@@ -4054,6 +4054,10 @@ struct llama_model_loader {
40544054
return false;
40554055
}
40564056

4057+
if (n > N_MAX) {
4058+
throw std::runtime_error(format("n > N_MAX: %u > %u for key %s", (uint32_t) n, (uint32_t) N_MAX, key.c_str()));
4059+
}
4060+
40574061
if (gguf_get_kv_type(meta, kid) == GGUF_TYPE_ARRAY) {
40584062
struct GGUFMeta::ArrayInfo arr_info =
40594063
GGUFMeta::GKV<GGUFMeta::ArrayInfo>::get_kv(meta, kid);
@@ -19920,7 +19924,7 @@ size_t llama_state_get_size(const struct llama_context * ctx) {
1992019924
);
1992119925

1992219926
// on session change it is very likely that the state size has changed - so we need to update this function
19923-
static_assert(LLAMA_SESSION_VERSION == 6, "So you just bumped the session version - good. But did you remember to update llama_state_get_size?");
19927+
static_assert(LLAMA_SESSION_VERSION == 7, "So you just bumped the session version - good. But did you remember to update llama_state_get_size?");
1992419928

1992519929
return s_total;
1992619930
}

0 commit comments

Comments
 (0)