@@ -255,74 +255,75 @@ class GGUFType:
255
255
256
256
257
257
class MODEL_ARCH (IntEnum ):
258
- CLIP_VISION = auto () # dummy arch for clip.cpp
259
- LLAMA = auto ()
260
- LLAMA4 = auto ()
261
- DECI = auto ()
262
- FALCON = auto ()
263
- BAICHUAN = auto ()
264
- GROK = auto ()
265
- GPT2 = auto ()
266
- GPTJ = auto ()
267
- GPTNEOX = auto ()
268
- MPT = auto ()
269
- STARCODER = auto ()
270
- REFACT = auto ()
271
- BERT = auto ()
272
- NOMIC_BERT = auto ()
273
- NOMIC_BERT_MOE = auto ()
274
- JINA_BERT_V2 = auto ()
275
- BLOOM = auto ()
276
- STABLELM = auto ()
277
- QWEN = auto ()
278
- QWEN2 = auto ()
279
- QWEN2MOE = auto ()
280
- QWEN2VL = auto ()
281
- QWEN3 = auto ()
282
- QWEN3MOE = auto ()
283
- PHI2 = auto ()
284
- PHI3 = auto ()
285
- PHIMOE = auto ()
286
- PLAMO = auto ()
287
- CODESHELL = auto ()
288
- ORION = auto ()
289
- INTERNLM2 = auto ()
290
- MINICPM = auto ()
291
- MINICPM3 = auto ()
292
- GEMMA = auto ()
293
- GEMMA2 = auto ()
294
- GEMMA3 = auto ()
295
- STARCODER2 = auto ()
296
- RWKV6 = auto ()
297
- RWKV6QWEN2 = auto ()
298
- RWKV7 = auto ()
299
- ARWKV7 = auto ()
300
- MAMBA = auto ()
301
- XVERSE = auto ()
302
- COMMAND_R = auto ()
303
- COHERE2 = auto ()
304
- DBRX = auto ()
305
- OLMO = auto ()
306
- OLMO2 = auto ()
307
- OLMOE = auto ()
308
- OPENELM = auto ()
309
- ARCTIC = auto ()
310
- DEEPSEEK = auto ()
311
- DEEPSEEK2 = auto ()
312
- CHATGLM = auto ()
313
- GLM4 = auto ()
314
- BITNET = auto ()
315
- T5 = auto ()
316
- T5ENCODER = auto ()
317
- JAIS = auto ()
318
- NEMOTRON = auto ()
319
- EXAONE = auto ()
320
- GRANITE = auto ()
321
- GRANITE_MOE = auto ()
322
- CHAMELEON = auto ()
323
- WAVTOKENIZER_DEC = auto ()
324
- PLM = auto ()
325
- BAILINGMOE = auto ()
258
+ CLIP_VISION = auto () # dummy arch for clip.cpp
259
+ LLAMA = auto ()
260
+ LLAMA4 = auto ()
261
+ DECI = auto ()
262
+ FALCON = auto ()
263
+ BAICHUAN = auto ()
264
+ GROK = auto ()
265
+ GPT2 = auto ()
266
+ GPTJ = auto ()
267
+ GPTNEOX = auto ()
268
+ MPT = auto ()
269
+ STARCODER = auto ()
270
+ REFACT = auto ()
271
+ BERT = auto ()
272
+ NOMIC_BERT = auto ()
273
+ NOMIC_BERT_MOE = auto ()
274
+ JINA_BERT_V2 = auto ()
275
+ BLOOM = auto ()
276
+ STABLELM = auto ()
277
+ QWEN = auto ()
278
+ QWEN2 = auto ()
279
+ QWEN2MOE = auto ()
280
+ QWEN2VL = auto ()
281
+ QWEN3 = auto ()
282
+ QWEN3MOE = auto ()
283
+ PHI2 = auto ()
284
+ PHI3 = auto ()
285
+ PHIMOE = auto ()
286
+ PLAMO = auto ()
287
+ CODESHELL = auto ()
288
+ ORION = auto ()
289
+ INTERNLM2 = auto ()
290
+ MINICPM = auto ()
291
+ MINICPM3 = auto ()
292
+ GEMMA = auto ()
293
+ GEMMA2 = auto ()
294
+ GEMMA3 = auto ()
295
+ STARCODER2 = auto ()
296
+ RWKV6 = auto ()
297
+ RWKV6QWEN2 = auto ()
298
+ RWKV7 = auto ()
299
+ ARWKV7 = auto ()
300
+ MAMBA = auto ()
301
+ XVERSE = auto ()
302
+ COMMAND_R = auto ()
303
+ COHERE2 = auto ()
304
+ DBRX = auto ()
305
+ OLMO = auto ()
306
+ OLMO2 = auto ()
307
+ OLMOE = auto ()
308
+ OPENELM = auto ()
309
+ ARCTIC = auto ()
310
+ DEEPSEEK = auto ()
311
+ DEEPSEEK2 = auto ()
312
+ CHATGLM = auto ()
313
+ GLM4 = auto ()
314
+ BITNET = auto ()
315
+ T5 = auto ()
316
+ T5ENCODER = auto ()
317
+ JAIS = auto ()
318
+ NEMOTRON = auto ()
319
+ EXAONE = auto ()
320
+ GRANITE = auto ()
321
+ GRANITE_MOE = auto ()
322
+ GRANITE_MOE_SHARED = auto ()
323
+ CHAMELEON = auto ()
324
+ WAVTOKENIZER_DEC = auto ()
325
+ PLM = auto ()
326
+ BAILINGMOE = auto ()
326
327
327
328
328
329
class VISION_PROJECTOR_TYPE (IntEnum ):
@@ -512,74 +513,75 @@ class MODEL_TENSOR(IntEnum):
512
513
513
514
514
515
MODEL_ARCH_NAMES : dict [MODEL_ARCH , str ] = {
515
- MODEL_ARCH .CLIP_VISION : "clip" , # dummy arch for clip.cpp
516
- MODEL_ARCH .LLAMA : "llama" ,
517
- MODEL_ARCH .LLAMA4 : "llama4" ,
518
- MODEL_ARCH .DECI : "deci" ,
519
- MODEL_ARCH .FALCON : "falcon" ,
520
- MODEL_ARCH .BAICHUAN : "baichuan" ,
521
- MODEL_ARCH .GROK : "grok" ,
522
- MODEL_ARCH .GPT2 : "gpt2" ,
523
- MODEL_ARCH .GPTJ : "gptj" ,
524
- MODEL_ARCH .GPTNEOX : "gptneox" ,
525
- MODEL_ARCH .MPT : "mpt" ,
526
- MODEL_ARCH .STARCODER : "starcoder" ,
527
- MODEL_ARCH .REFACT : "refact" ,
528
- MODEL_ARCH .BERT : "bert" ,
529
- MODEL_ARCH .NOMIC_BERT : "nomic-bert" ,
530
- MODEL_ARCH .NOMIC_BERT_MOE : "nomic-bert-moe" ,
531
- MODEL_ARCH .JINA_BERT_V2 : "jina-bert-v2" ,
532
- MODEL_ARCH .BLOOM : "bloom" ,
533
- MODEL_ARCH .STABLELM : "stablelm" ,
534
- MODEL_ARCH .QWEN : "qwen" ,
535
- MODEL_ARCH .QWEN2 : "qwen2" ,
536
- MODEL_ARCH .QWEN2MOE : "qwen2moe" ,
537
- MODEL_ARCH .QWEN2VL : "qwen2vl" ,
538
- MODEL_ARCH .QWEN3 : "qwen3" ,
539
- MODEL_ARCH .QWEN3MOE : "qwen3moe" ,
540
- MODEL_ARCH .PHI2 : "phi2" ,
541
- MODEL_ARCH .PHI3 : "phi3" ,
542
- MODEL_ARCH .PHIMOE : "phimoe" ,
543
- MODEL_ARCH .PLAMO : "plamo" ,
544
- MODEL_ARCH .CODESHELL : "codeshell" ,
545
- MODEL_ARCH .ORION : "orion" ,
546
- MODEL_ARCH .INTERNLM2 : "internlm2" ,
547
- MODEL_ARCH .MINICPM : "minicpm" ,
548
- MODEL_ARCH .MINICPM3 : "minicpm3" ,
549
- MODEL_ARCH .GEMMA : "gemma" ,
550
- MODEL_ARCH .GEMMA2 : "gemma2" ,
551
- MODEL_ARCH .GEMMA3 : "gemma3" ,
552
- MODEL_ARCH .STARCODER2 : "starcoder2" ,
553
- MODEL_ARCH .RWKV6 : "rwkv6" ,
554
- MODEL_ARCH .RWKV6QWEN2 : "rwkv6qwen2" ,
555
- MODEL_ARCH .RWKV7 : "rwkv7" ,
556
- MODEL_ARCH .ARWKV7 : "arwkv7" ,
557
- MODEL_ARCH .MAMBA : "mamba" ,
558
- MODEL_ARCH .XVERSE : "xverse" ,
559
- MODEL_ARCH .COMMAND_R : "command-r" ,
560
- MODEL_ARCH .COHERE2 : "cohere2" ,
561
- MODEL_ARCH .DBRX : "dbrx" ,
562
- MODEL_ARCH .OLMO : "olmo" ,
563
- MODEL_ARCH .OLMO2 : "olmo2" ,
564
- MODEL_ARCH .OLMOE : "olmoe" ,
565
- MODEL_ARCH .OPENELM : "openelm" ,
566
- MODEL_ARCH .ARCTIC : "arctic" ,
567
- MODEL_ARCH .DEEPSEEK : "deepseek" ,
568
- MODEL_ARCH .DEEPSEEK2 : "deepseek2" ,
569
- MODEL_ARCH .CHATGLM : "chatglm" ,
570
- MODEL_ARCH .GLM4 : "glm4" ,
571
- MODEL_ARCH .BITNET : "bitnet" ,
572
- MODEL_ARCH .T5 : "t5" ,
573
- MODEL_ARCH .T5ENCODER : "t5encoder" ,
574
- MODEL_ARCH .JAIS : "jais" ,
575
- MODEL_ARCH .NEMOTRON : "nemotron" ,
576
- MODEL_ARCH .EXAONE : "exaone" ,
577
- MODEL_ARCH .GRANITE : "granite" ,
578
- MODEL_ARCH .GRANITE_MOE : "granitemoe" ,
579
- MODEL_ARCH .CHAMELEON : "chameleon" ,
580
- MODEL_ARCH .WAVTOKENIZER_DEC : "wavtokenizer-dec" ,
581
- MODEL_ARCH .PLM : "plm" ,
582
- MODEL_ARCH .BAILINGMOE : "bailingmoe" ,
516
+ MODEL_ARCH .CLIP_VISION : "clip" , # dummy arch for clip.cpp
517
+ MODEL_ARCH .LLAMA : "llama" ,
518
+ MODEL_ARCH .LLAMA4 : "llama4" ,
519
+ MODEL_ARCH .DECI : "deci" ,
520
+ MODEL_ARCH .FALCON : "falcon" ,
521
+ MODEL_ARCH .BAICHUAN : "baichuan" ,
522
+ MODEL_ARCH .GROK : "grok" ,
523
+ MODEL_ARCH .GPT2 : "gpt2" ,
524
+ MODEL_ARCH .GPTJ : "gptj" ,
525
+ MODEL_ARCH .GPTNEOX : "gptneox" ,
526
+ MODEL_ARCH .MPT : "mpt" ,
527
+ MODEL_ARCH .STARCODER : "starcoder" ,
528
+ MODEL_ARCH .REFACT : "refact" ,
529
+ MODEL_ARCH .BERT : "bert" ,
530
+ MODEL_ARCH .NOMIC_BERT : "nomic-bert" ,
531
+ MODEL_ARCH .NOMIC_BERT_MOE : "nomic-bert-moe" ,
532
+ MODEL_ARCH .JINA_BERT_V2 : "jina-bert-v2" ,
533
+ MODEL_ARCH .BLOOM : "bloom" ,
534
+ MODEL_ARCH .STABLELM : "stablelm" ,
535
+ MODEL_ARCH .QWEN : "qwen" ,
536
+ MODEL_ARCH .QWEN2 : "qwen2" ,
537
+ MODEL_ARCH .QWEN2MOE : "qwen2moe" ,
538
+ MODEL_ARCH .QWEN2VL : "qwen2vl" ,
539
+ MODEL_ARCH .QWEN3 : "qwen3" ,
540
+ MODEL_ARCH .QWEN3MOE : "qwen3moe" ,
541
+ MODEL_ARCH .PHI2 : "phi2" ,
542
+ MODEL_ARCH .PHI3 : "phi3" ,
543
+ MODEL_ARCH .PHIMOE : "phimoe" ,
544
+ MODEL_ARCH .PLAMO : "plamo" ,
545
+ MODEL_ARCH .CODESHELL : "codeshell" ,
546
+ MODEL_ARCH .ORION : "orion" ,
547
+ MODEL_ARCH .INTERNLM2 : "internlm2" ,
548
+ MODEL_ARCH .MINICPM : "minicpm" ,
549
+ MODEL_ARCH .MINICPM3 : "minicpm3" ,
550
+ MODEL_ARCH .GEMMA : "gemma" ,
551
+ MODEL_ARCH .GEMMA2 : "gemma2" ,
552
+ MODEL_ARCH .GEMMA3 : "gemma3" ,
553
+ MODEL_ARCH .STARCODER2 : "starcoder2" ,
554
+ MODEL_ARCH .RWKV6 : "rwkv6" ,
555
+ MODEL_ARCH .RWKV6QWEN2 : "rwkv6qwen2" ,
556
+ MODEL_ARCH .RWKV7 : "rwkv7" ,
557
+ MODEL_ARCH .ARWKV7 : "arwkv7" ,
558
+ MODEL_ARCH .MAMBA : "mamba" ,
559
+ MODEL_ARCH .XVERSE : "xverse" ,
560
+ MODEL_ARCH .COMMAND_R : "command-r" ,
561
+ MODEL_ARCH .COHERE2 : "cohere2" ,
562
+ MODEL_ARCH .DBRX : "dbrx" ,
563
+ MODEL_ARCH .OLMO : "olmo" ,
564
+ MODEL_ARCH .OLMO2 : "olmo2" ,
565
+ MODEL_ARCH .OLMOE : "olmoe" ,
566
+ MODEL_ARCH .OPENELM : "openelm" ,
567
+ MODEL_ARCH .ARCTIC : "arctic" ,
568
+ MODEL_ARCH .DEEPSEEK : "deepseek" ,
569
+ MODEL_ARCH .DEEPSEEK2 : "deepseek2" ,
570
+ MODEL_ARCH .CHATGLM : "chatglm" ,
571
+ MODEL_ARCH .GLM4 : "glm4" ,
572
+ MODEL_ARCH .BITNET : "bitnet" ,
573
+ MODEL_ARCH .T5 : "t5" ,
574
+ MODEL_ARCH .T5ENCODER : "t5encoder" ,
575
+ MODEL_ARCH .JAIS : "jais" ,
576
+ MODEL_ARCH .NEMOTRON : "nemotron" ,
577
+ MODEL_ARCH .EXAONE : "exaone" ,
578
+ MODEL_ARCH .GRANITE : "granite" ,
579
+ MODEL_ARCH .GRANITE_MOE : "granitemoe" ,
580
+ MODEL_ARCH .GRANITE_MOE_SHARED : "granitemoeshared" ,
581
+ MODEL_ARCH .CHAMELEON : "chameleon" ,
582
+ MODEL_ARCH .WAVTOKENIZER_DEC : "wavtokenizer-dec" ,
583
+ MODEL_ARCH .PLM : "plm" ,
584
+ MODEL_ARCH .BAILINGMOE : "bailingmoe" ,
583
585
}
584
586
585
587
VISION_PROJECTOR_TYPE_NAMES : dict [VISION_PROJECTOR_TYPE , str ] = {
@@ -1894,6 +1896,23 @@ class MODEL_TENSOR(IntEnum):
1894
1896
MODEL_TENSOR .FFN_DOWN_EXP ,
1895
1897
MODEL_TENSOR .FFN_UP_EXP ,
1896
1898
],
1899
+ MODEL_ARCH .GRANITE_MOE_SHARED : [
1900
+ MODEL_TENSOR .TOKEN_EMBD ,
1901
+ MODEL_TENSOR .OUTPUT_NORM ,
1902
+ MODEL_TENSOR .OUTPUT ,
1903
+ MODEL_TENSOR .ATTN_NORM ,
1904
+ MODEL_TENSOR .ATTN_Q ,
1905
+ MODEL_TENSOR .ATTN_K ,
1906
+ MODEL_TENSOR .ATTN_V ,
1907
+ MODEL_TENSOR .ATTN_OUT ,
1908
+ MODEL_TENSOR .FFN_NORM ,
1909
+ MODEL_TENSOR .FFN_GATE_INP ,
1910
+ MODEL_TENSOR .FFN_GATE_EXP ,
1911
+ MODEL_TENSOR .FFN_DOWN_EXP ,
1912
+ MODEL_TENSOR .FFN_UP_EXP ,
1913
+ MODEL_TENSOR .FFN_UP_SHEXP ,
1914
+ MODEL_TENSOR .FFN_DOWN_SHEXP ,
1915
+ ],
1897
1916
MODEL_ARCH .CHAMELEON : [
1898
1917
MODEL_TENSOR .TOKEN_EMBD ,
1899
1918
MODEL_TENSOR .OUTPUT_NORM ,
0 commit comments