@@ -293,11 +293,10 @@ class TensorNameMap:
293
293
),
294
294
295
295
MODEL_TENSOR .FFN_UP_EXP : (
296
- "layers.{bid}.feed_forward.experts.w3" , # mixtral (merged)
297
- "transformer.decoder_layer.{bid}.moe.linear_v" , # Grok (merged)
298
- "transformer.blocks.{bid}.ffn.experts.mlp.v1" , # dbrx
299
- "model.layers.{bid}.mlp.experts.up_proj" , # qwen2moe olmoe (merged)
300
- "model.layers.{bid}.block_sparse_moe.input_linear.up" , # granitemoe
296
+ "layers.{bid}.feed_forward.experts.w3" , # mixtral (merged)
297
+ "transformer.decoder_layer.{bid}.moe.linear_v" , # Grok (merged)
298
+ "transformer.blocks.{bid}.ffn.experts.mlp.v1" , # dbrx
299
+ "model.layers.{bid}.mlp.experts.up_proj" , # qwen2moe olmoe (merged)
301
300
),
302
301
303
302
MODEL_TENSOR .FFN_UP_SHEXP : (
@@ -326,11 +325,10 @@ class TensorNameMap:
326
325
),
327
326
328
327
MODEL_TENSOR .FFN_GATE_EXP : (
329
- "layers.{bid}.feed_forward.experts.w1" , # mixtral (merged)
330
- "transformer.decoder_layer.{bid}.moe.linear" , # Grok (merged)
331
- "transformer.blocks.{bid}.ffn.experts.mlp.w1" , # dbrx
332
- "model.layers.{bid}.mlp.experts.gate_proj" , # qwen2moe olmoe (merged)
333
- "model.layers.{bid}.block_sparse_moe.input_linear.gate" , # granitemoe
328
+ "layers.{bid}.feed_forward.experts.w1" , # mixtral (merged)
329
+ "transformer.decoder_layer.{bid}.moe.linear" , # Grok (merged)
330
+ "transformer.blocks.{bid}.ffn.experts.mlp.w1" , # dbrx
331
+ "model.layers.{bid}.mlp.experts.gate_proj" , # qwen2moe olmoe (merged)
334
332
),
335
333
336
334
MODEL_TENSOR .FFN_GATE_SHEXP : (
0 commit comments