@@ -292,10 +292,11 @@ class TensorNameMap:
292
292
),
293
293
294
294
MODEL_TENSOR .FFN_UP_EXP : (
295
- "layers.{bid}.feed_forward.experts.w3" , # mixtral (merged)
296
- "transformer.decoder_layer.{bid}.moe.linear_v" , # Grok (merged)
297
- "transformer.blocks.{bid}.ffn.experts.mlp.v1" , # dbrx
298
- "model.layers.{bid}.mlp.experts.up_proj" , # qwen2moe olmoe (merged)
295
+ "layers.{bid}.feed_forward.experts.w3" , # mixtral (merged)
296
+ "transformer.decoder_layer.{bid}.moe.linear_v" , # Grok (merged)
297
+ "transformer.blocks.{bid}.ffn.experts.mlp.v1" , # dbrx
298
+ "model.layers.{bid}.mlp.experts.up_proj" , # qwen2moe olmoe (merged)
299
+ "model.layers.{bid}.block_sparse_moe.input_linear" , # granitemoe
299
300
),
300
301
301
302
MODEL_TENSOR .FFN_UP_SHEXP : (
@@ -324,10 +325,11 @@ class TensorNameMap:
324
325
),
325
326
326
327
MODEL_TENSOR .FFN_GATE_EXP : (
327
- "layers.{bid}.feed_forward.experts.w1" , # mixtral (merged)
328
- "transformer.decoder_layer.{bid}.moe.linear" , # Grok (merged)
329
- "transformer.blocks.{bid}.ffn.experts.mlp.w1" , # dbrx
330
- "model.layers.{bid}.mlp.experts.gate_proj" , # qwen2moe olmoe (merged)
328
+ "layers.{bid}.feed_forward.experts.w1" , # mixtral (merged)
329
+ "transformer.decoder_layer.{bid}.moe.linear" , # Grok (merged)
330
+ "transformer.blocks.{bid}.ffn.experts.mlp.w1" , # dbrx
331
+ "model.layers.{bid}.mlp.experts.gate_proj" , # qwen2moe olmoe (merged)
332
+ "model.layers.{bid}.block_sparse_moe.router.layer" , # granitemoe
331
333
),
332
334
333
335
MODEL_TENSOR .FFN_GATE_SHEXP : (
@@ -364,10 +366,11 @@ class TensorNameMap:
364
366
),
365
367
366
368
MODEL_TENSOR .FFN_DOWN_EXP : (
367
- "layers.{bid}.feed_forward.experts.w2" , # mixtral (merged)
368
- "transformer.decoder_layer.{bid}.moe.linear_1" , # Grok (merged)
369
- "transformer.blocks.{bid}.ffn.experts.mlp.w2" , # dbrx
370
- "model.layers.{bid}.mlp.experts.down_proj" , # qwen2moe olmoe (merged)
369
+ "layers.{bid}.feed_forward.experts.w2" , # mixtral (merged)
370
+ "transformer.decoder_layer.{bid}.moe.linear_1" , # Grok (merged)
371
+ "transformer.blocks.{bid}.ffn.experts.mlp.w2" , # dbrx
372
+ "model.layers.{bid}.mlp.experts.down_proj" , # qwen2moe olmoe (merged)
373
+ "model.layers.{bid}.block_sparse_moe.output_linear" , # granitemoe
371
374
),
372
375
373
376
MODEL_TENSOR .FFN_DOWN_SHEXP : (
0 commit comments