fix: Allow "output" layer in granite moe architecture (convert and cpp)

gabe-l-hart · gabe-l-hart · commit 1c8b3e4c4464 · 2024-09-24T10:24:12.000-06:00
Branch: GraniteMoE

Co-Authored-By: git@compilade.net
Signed-off-by: Gabe Goodhart &lt;ghart@us.ibm.com&gt;
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
@@ -1247,6 +1247,7 @@ class MODEL_TENSOR(IntEnum):
     MODEL_ARCH.GRANITE_MOE: [
         MODEL_TENSOR.TOKEN_EMBD,
         MODEL_TENSOR.OUTPUT_NORM,
+        MODEL_TENSOR.OUTPUT,
         MODEL_TENSOR.ATTN_NORM,
         MODEL_TENSOR.ATTN_Q,
         MODEL_TENSOR.ATTN_K,
diff --git a/src/llama.cpp b/src/llama.cpp
@@ -1485,6 +1485,7 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
         {
             { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
             { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
+            { LLM_TENSOR_OUTPUT,          "output" },
             { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
             { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
             { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },

Original file line number	Diff line number	Diff line change
`@@ -1485,6 +1485,7 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA`
`1485`	`1485`	`{`
`1486`	`1486`	`{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },`
`1487`	`1487`	`{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },`
	`1488`	`+ { LLM_TENSOR_OUTPUT, "output" },`
`1488`	`1489`	`{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },`
`1489`	`1490`	`{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },`
`1490`	`1491`	`{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },`