@@ -402,6 +402,7 @@ enum llama_file_version {
402
402
LLAMA_FILE_VERSION_GGML,
403
403
LLAMA_FILE_VERSION_GGMF_V1, // added version field and scores in vocab
404
404
LLAMA_FILE_VERSION_GGJT_V1, // added padding
405
+ LLAMA_FILE_VERSION_GGJT_V2, // changed quantization format
405
406
};
406
407
407
408
struct llama_file_loader {
@@ -432,6 +433,8 @@ struct llama_file_loader {
432
433
file_version = LLAMA_FILE_VERSION_GGMF_V1;
433
434
} else if (magic == ' ggjt' && version == 1 ) {
434
435
file_version = LLAMA_FILE_VERSION_GGJT_V1;
436
+ } else if (magic == ' ggjt' && version == 2 ) {
437
+ file_version = LLAMA_FILE_VERSION_GGJT_V2;
435
438
} else {
436
439
throw format (" unknown (magic, version) combination: %08x, %08x; is this really a GGML file?" ,
437
440
magic, version);
@@ -837,8 +840,8 @@ static const char *llama_file_version_name(llama_file_version version) {
837
840
switch (version) {
838
841
case LLAMA_FILE_VERSION_GGML: return " 'ggml' (old version with low tokenizer quality and no mmap support)" ;
839
842
case LLAMA_FILE_VERSION_GGMF_V1: return " ggmf v1 (old version with no mmap support)" ;
840
- case LLAMA_FILE_VERSION_GGJT_V1: return " ggjt v1 (latest )" ;
841
- default : LLAMA_ASSERT ( false ) ;
843
+ case LLAMA_FILE_VERSION_GGJT_V1: return " ggjt v1 (pre #1305 )" ;
844
+ case LLAMA_FILE_VERSION_GGJT_V2: return " ggjt v2 (latest) " ;
842
845
}
843
846
}
844
847
@@ -915,6 +918,14 @@ static void llama_model_load_internal(
915
918
fprintf (stderr, " %s: model size = %s\n " , __func__, llama_model_type_name (model.type ));
916
919
}
917
920
921
+ if (file_version != LLAMA_FILE_VERSION_GGJT_V2) {
922
+ if (hparams.ftype != LLAMA_FTYPE_ALL_F32 &&
923
+ hparams.ftype != LLAMA_FTYPE_MOSTLY_F16 &&
924
+ hparams.ftype != LLAMA_FTYPE_MOSTLY_Q8_0) {
925
+ throw format (" this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1305)" );
926
+ }
927
+ }
928
+
918
929
if (vocab_only) {
919
930
return ;
920
931
}
0 commit comments