Add deploy method with validations on configs and instance types

siddvenk · siddvenk · commit d8fa214f4e29 · 2023-02-08T11:14:41.000-08:00
diff --git a/src/sagemaker/djl_inference/defaults.py b/src/sagemaker/djl_inference/defaults.py
@@ -36,3 +36,10 @@
     "roberta",
     "bert",
 }
+
+ALLOWED_INSTANCE_FAMILIES = {
+    "ml.g4",
+    "ml.g5",
+    "ml.p3",
+    "ml.p4",
+}
diff --git a/src/sagemaker/djl_inference/model.py b/src/sagemaker/djl_inference/model.py
@@ -93,6 +93,10 @@ def __init__(
         predictor_cls: callable = DJLLargeModelPredictor,
         **kwargs,
     ):
+        if kwargs.get("model_data") is not None:
+            raise ValueError("DJLLargeModels do not support the model_data parameter. Please use"
+                             "uncompressed_model_data and ensure the s3 uri points to a folder containing"
+                             "all model artifacts, not a tar.gz file")
         super(DJLLargeModel, self).__init__(
             None, image_uri, role, entry_point, predictor_cls=predictor_cls, **kwargs
         )
@@ -140,6 +144,58 @@ def compile(
     ):
         raise NotImplementedError("DJLLargeModels do not currently support compilation with SageMaker Neo")
 
+    def deploy(
+        self,
+        initial_instance_count=None,
+        instance_type=None,
+        serializer=None,
+        deserializer=None,
+        accelerator_type=None,
+        endpoint_name=None,
+        tags=None,
+        kms_key=None,
+        wait=True,
+        data_capture_config=None,
+        async_inference_config=None,
+        serverless_inference_config=None,
+        volume_size=None,
+        model_data_download_timeout=None,
+        container_startup_health_check_timeout=None,
+        **kwargs,
+    ):
+        if accelerator_type:
+            raise ValueError("DJLLargeModels do not support Elastic Inference Accelerators")
+        if serverless_inference_config:
+            raise ValueError("DJLLargeModels do not support Serverless Deployment")
+        if instance_type is None and not self.inference_recommender_job_results:
+            raise ValueError(f"instance_type must be specified, or inference recommendation from right_size()"
+                             "must be run to deploy the model. Supported instance type families are :"
+                             f"{defaults.ALLOWED_INSTANCE_FAMILIES}")
+        if instance_type:
+            instance_family = instance_type.rsplit('.', 1)[0]
+            if not instance_family in defaults.ALLOWED_INSTANCE_FAMILIES:
+                raise ValueError(f"Invalid instance type. DJLLargeModels only support deployment to instances"
+                                 f"with GPUs. Supported instance families are {defaults.ALLOWED_INSTANCE_FAMILIES}")
+
+        super(DJLLargeModel, self).deploy(
+            initial_instance_count=initial_instance_count,
+            instance_type=instance_type,
+            serializer=serializer,
+            deserializer=deserializer,
+            accelerator_type=accelerator_type,
+            endpoint_name=endpoint_name,
+            tags=tags,
+            kms_key=kms_key,
+            wait=wait,
+            data_capture_config=data_capture_config,
+            async_inference_config=async_inference_config,
+            serverless_inference_config=serverless_inference_config,
+            volume_size=volume_size,
+            model_data_download_timeout=model_data_download_timeout,
+            container_startup_health_check_timeout=container_startup_health_check_timeout,
+            **kwargs,
+        )
+
     def prepare_container_def(
         self,
         instance_type=None,