huggingface · yuanwu2017 · May 13, 2025 · May 16, 2025 · May 19, 2025 · May 20, 2025
diff --git a/Dockerfile_gaudi b/Dockerfile_gaudi
@@ -127,5 +127,5 @@ ENV OMPI_MCA_btl_vader_single_copy_mechanism NONE
 COPY backends/gaudi/tgi-entrypoint.sh /tgi-entrypoint.sh
 RUN chmod +x /tgi-entrypoint.sh
 
-ENTRYPOINT ["/tgi-entrypoint.sh"]
-CMD ["--json-output"]
+#ENTRYPOINT ["/tgi-entrypoint.sh"]
+#CMD ["--json-output"]
diff --git a/backends/gaudi/server/text_generation_server/models/__init__.py b/backends/gaudi/server/text_generation_server/models/__init__.py
@@ -112,6 +112,9 @@
     from text_generation_server.models.custom_modeling.flash_qwen3_modeling import (
         Qwen3ForCausalLM,
     )
+    from text_generation_server.models.custom_modeling.flash_qwen3_moe_modeling import (
+        Qwen3MoeForCausalLM,
+    )
     from text_generation_server.models.custom_modeling.flash_mistral_modeling import (
         FlashMistralForCausalLM,
     )
@@ -301,7 +304,11 @@ class ModelType(enum.Enum):
         "name": "Qwen 3",
         "url": "https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f",
     }
-
+    QWEN3_MOE = {
+        "type": "qwen3_moe",
+        "name": "Qwen 3 Moe",
+        "url": "https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f",
+    }
     GALACTICA = {
         "type": "galactica",
         "name": "Galactica",
@@ -812,6 +819,18 @@ def get_model(
                 trust_remote_code=trust_remote_code,
                 lora_adapter_ids=lora_adapter_ids,
             )
+        elif model_type == QWEN3_MOE:
+            return FlashCausalLM(
+                model_id=model_id,
+                model_class=Qwen3MoeForCausalLM,
+                revision=revision,
+                quantize=quantize,
+                speculator=speculator,
+                dtype=dtype,
+                kv_cache_dtype=kv_cache_dtype,
+                trust_remote_code=trust_remote_code,
+                lora_adapter_ids=lora_adapter_ids,
+            )
         elif model_type == MLLAMA:
             return FlashMllamaCausalLM(
                 model_id=model_id,