feat: instance type variants for environment variables

evakravi · evakravi · commit 6ca662737f74 · 2023-09-20T15:34:40.000Z
diff --git a/src/sagemaker/environment_variables.py b/src/sagemaker/environment_variables.py
@@ -20,6 +20,7 @@
 from sagemaker.jumpstart import utils as jumpstart_utils
 from sagemaker.jumpstart import artifacts
 from sagemaker.jumpstart.constants import DEFAULT_JUMPSTART_SAGEMAKER_SESSION
+from sagemaker.jumpstart.enums import JumpStartScriptScope
 from sagemaker.session import Session
 
 logger = logging.getLogger(__name__)
@@ -33,6 +34,8 @@ def retrieve_default(
     tolerate_deprecated_model: bool = False,
     include_aws_sdk_env_vars: bool = True,
     sagemaker_session: Session = DEFAULT_JUMPSTART_SAGEMAKER_SESSION,
+    instance_type: Optional[str] = None,
+    script: JumpStartScriptScope = JumpStartScriptScope.INFERENCE,
 ) -> Dict[str, str]:
     """Retrieves the default container environment variables for the model matching the arguments.
 
@@ -58,6 +61,9 @@ def retrieve_default(
             object, used for SageMaker interactions. If not
             specified, one is created using the default AWS configuration
             chain. (Default: sagemaker.jumpstart.constants.DEFAULT_JUMPSTART_SAGEMAKER_SESSION).
+        instance_type (str): An instance type to optionally supply in order to get environment variables
+            specific for the instance type.
+        script (JumpStartScriptScope): The JumpStart script for which to retrieve environment variables.
     Returns:
         dict: The variables to use for the model.
 
@@ -78,4 +84,6 @@ def retrieve_default(
         tolerate_deprecated_model,
         include_aws_sdk_env_vars,
         sagemaker_session=sagemaker_session,
+        instance_type=instance_type,
+        script=script,
     )
diff --git a/src/sagemaker/jumpstart/artifacts/environment_variables.py b/src/sagemaker/jumpstart/artifacts/environment_variables.py
@@ -34,6 +34,8 @@ def _retrieve_default_environment_variables(
     tolerate_deprecated_model: bool = False,
     include_aws_sdk_env_vars: bool = True,
     sagemaker_session: Session = DEFAULT_JUMPSTART_SAGEMAKER_SESSION,
+    instance_type: Optional[str] = None,
+    script: JumpStartScriptScope = JumpStartScriptScope.INFERENCE,
 ) -> Dict[str, str]:
     """Retrieves the inference environment variables for the model matching the given arguments.
 
@@ -59,6 +61,9 @@ def _retrieve_default_environment_variables(
             object, used for SageMaker interactions. If not
             specified, one is created using the default AWS configuration
             chain. (Default: sagemaker.jumpstart.constants.DEFAULT_JUMPSTART_SAGEMAKER_SESSION).
+        instance_type (str): An instance type to optionally supply in order to get environment variables
+            specific for the instance type.
+        script (JumpStartScriptScope): The JumpStart script for which to retrieve environment variables.
     Returns:
         dict: the inference environment variables to use for the model.
     """
@@ -69,17 +74,37 @@ def _retrieve_default_environment_variables(
     model_specs = verify_model_region_and_return_specs(
         model_id=model_id,
         version=model_version,
-        scope=JumpStartScriptScope.INFERENCE,
+        scope=script,
         region=region,
         tolerate_vulnerable_model=tolerate_vulnerable_model,
         tolerate_deprecated_model=tolerate_deprecated_model,
         sagemaker_session=sagemaker_session,
     )
 
     default_environment_variables: Dict[str, str] = {}
-    for environment_variable in model_specs.inference_environment_variables:
-        if include_aws_sdk_env_vars or environment_variable.required_for_model_class:
-            default_environment_variables[environment_variable.name] = str(
-                environment_variable.default
+    if script == JumpStartScriptScope.INFERENCE:
+        for environment_variable in model_specs.inference_environment_variables:
+            if include_aws_sdk_env_vars or environment_variable.required_for_model_class:
+                default_environment_variables[environment_variable.name] = str(
+                    environment_variable.default
+                )
+
+    if instance_type:
+        if script == JumpStartScriptScope.INFERENCE and getattr(
+            model_specs, "hosting_instance_type_variants", None
+        ):
+            default_environment_variables.update(
+                model_specs.hosting_instance_type_variants.get_instance_specific_environment_variables(
+                    instance_type
+                )
+            )
+        elif script == JumpStartScriptScope.TRAINING and getattr(
+            model_specs, "training_instance_type_variants", None
+        ):
+            default_environment_variables.update(
+                model_specs.training_instance_type_variants.get_instance_specific_environment_variables(
+                    instance_type
+                )
             )
+
     return default_environment_variables
diff --git a/src/sagemaker/jumpstart/factory/model.py b/src/sagemaker/jumpstart/factory/model.py
@@ -286,6 +286,8 @@ def _add_env_to_kwargs(kwargs: JumpStartModelInitKwargs) -> JumpStartModelInitKw
         tolerate_deprecated_model=kwargs.tolerate_deprecated_model,
         tolerate_vulnerable_model=kwargs.tolerate_vulnerable_model,
         sagemaker_session=kwargs.sagemaker_session,
+        script=JumpStartScriptScope.INFERENCE,
+        instance_type=kwargs.instance_type,
     )
 
     for key, value in extra_env_vars.items():
diff --git a/src/sagemaker/jumpstart/types.py b/src/sagemaker/jumpstart/types.py
@@ -346,6 +346,35 @@ def to_json(self) -> Dict[str, Any]:
         json_obj = {att: getattr(self, att) for att in self.__slots__ if hasattr(self, att)}
         return json_obj
 
+    def get_instance_specific_environment_variables(self, instance_type: str) -> Dict[str, str]:
+        """Returns instance specific environment variables.
+
+        Not all models and images have instance specific environment variables.
+        """
+
+        if self.variants is None:
+            return {}
+
+        instance_specific_environment_variables: dict = (
+            self.variants.get(instance_type, {})
+            .get("properties", {})
+            .get("environment_variables", {})
+        )
+
+        instance_type_family = get_instance_type_family(instance_type)
+
+        instance_family_environment_variables: dict = (
+            self.variants.get(instance_type_family, {})
+            .get("properties", {})
+            .get("environment_variables", {})
+            if instance_type_family not in {"", None}
+            else {}
+        )
+
+        instance_family_environment_variables.update(instance_specific_environment_variables)
+
+        return instance_family_environment_variables
+
     def get_image_uri(self, instance_type: str, region: str) -> Optional[str]:
         """Returns image uri from instance type and region.
 
diff --git a/tests/unit/sagemaker/environment_variables/jumpstart/test_default.py b/tests/unit/sagemaker/environment_variables/jumpstart/test_default.py
@@ -19,7 +19,7 @@
 
 from sagemaker import environment_variables
 
-from tests.unit.sagemaker.jumpstart.utils import get_spec_from_base_spec
+from tests.unit.sagemaker.jumpstart.utils import get_spec_from_base_spec, get_special_model_spec
 
 mock_client = boto3.client("s3")
 mock_session = Mock(s3_client=mock_client)
@@ -175,3 +175,76 @@ def test_jumpstart_sdk_environment_variables(patched_get_model_specs):
             model_id=model_id,
             include_aws_sdk_env_vars=False,
         )
+
+
+@patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs")
+def test_jumpstart_sdk_environment_variables_instance_type_overrides(patched_get_model_specs):
+
+    patched_get_model_specs.side_effect = get_special_model_spec
+
+    model_id = "env-var-variant-model"
+    region = "us-west-2"
+
+    # assert that we can override default environment variables
+    vars = environment_variables.retrieve_default(
+        region=region,
+        model_id=model_id,
+        model_version="*",
+        include_aws_sdk_env_vars=False,
+        sagemaker_session=mock_session,
+        instance_type="ml.g5.48xlarge",
+    )
+    assert vars == {
+        "ENDPOINT_SERVER_TIMEOUT": "3600",
+        "HF_MODEL_ID": "/opt/ml/model",
+        "MAX_INPUT_LENGTH": "1024",
+        "MAX_TOTAL_TOKENS": "2048",
+        "MODEL_CACHE_ROOT": "/opt/ml/model",
+        "SAGEMAKER_ENV": "1",
+        "SAGEMAKER_MODEL_SERVER_WORKERS": "1",
+        "SAGEMAKER_PROGRAM": "inference.py",
+        "SM_NUM_GPUS": "80",
+    }
+
+    # assert that we can add environment variables
+    vars = environment_variables.retrieve_default(
+        region=region,
+        model_id=model_id,
+        model_version="*",
+        include_aws_sdk_env_vars=False,
+        sagemaker_session=mock_session,
+        instance_type="ml.p4d.24xlarge",
+    )
+    assert vars == {
+        "ENDPOINT_SERVER_TIMEOUT": "3600",
+        "HF_MODEL_ID": "/opt/ml/model",
+        "MAX_INPUT_LENGTH": "1024",
+        "MAX_TOTAL_TOKENS": "2048",
+        "MODEL_CACHE_ROOT": "/opt/ml/model",
+        "SAGEMAKER_ENV": "1",
+        "SAGEMAKER_MODEL_SERVER_WORKERS": "1",
+        "SAGEMAKER_PROGRAM": "inference.py",
+        "SM_NUM_GPUS": "8",
+        "YODEL": "NACEREMA",
+    }
+
+    # assert that we can return default env variables for unrecognized instance
+    vars = environment_variables.retrieve_default(
+        region=region,
+        model_id=model_id,
+        model_version="*",
+        include_aws_sdk_env_vars=False,
+        sagemaker_session=mock_session,
+        instance_type="ml.p002.xlarge",
+    )
+    assert vars == {
+        "ENDPOINT_SERVER_TIMEOUT": "3600",
+        "HF_MODEL_ID": "/opt/ml/model",
+        "MAX_INPUT_LENGTH": "1024",
+        "MAX_TOTAL_TOKENS": "2048",
+        "MODEL_CACHE_ROOT": "/opt/ml/model",
+        "SAGEMAKER_ENV": "1",
+        "SAGEMAKER_MODEL_SERVER_WORKERS": "1",
+        "SAGEMAKER_PROGRAM": "inference.py",
+        "SM_NUM_GPUS": "8",
+    }
diff --git a/tests/unit/sagemaker/jumpstart/constants.py b/tests/unit/sagemaker/jumpstart/constants.py
@@ -14,6 +14,166 @@
 
 
 SPECIAL_MODEL_SPECS_DICT = {
+    "env-var-variant-model": {
+        "model_id": "huggingface-llm-falcon-180b-bf16",
+        "url": "https://huggingface.co/tiiuae/falcon-180B",
+        "version": "1.0.0",
+        "min_sdk_version": "2.175.0",
+        "training_supported": False,
+        "incremental_training_supported": False,
+        "hosting_ecr_specs": {
+            "framework": "huggingface-llm",
+            "framework_version": "0.9.3",
+            "py_version": "py39",
+            "huggingface_transformers_version": "4.29.2",
+        },
+        "hosting_artifact_key": "huggingface-infer/infer-huggingface-llm-falcon-180b-bf16.tar.gz",
+        "hosting_script_key": "source-directory-tarballs/huggingface/inference/llm/v1.0.1/sourcedir.tar.gz",
+        "hosting_prepacked_artifact_key": "huggingface-infer/prepack/v1.0.1/infer-prepack"
+        "-huggingface-llm-falcon-180b-bf16.tar.gz",
+        "hosting_prepacked_artifact_version": "1.0.1",
+        "hosting_use_script_uri": False,
+        "inference_vulnerable": False,
+        "inference_dependencies": [],
+        "inference_vulnerabilities": [],
+        "training_vulnerable": False,
+        "training_dependencies": [],
+        "training_vulnerabilities": [],
+        "deprecated": False,
+        "inference_environment_variables": [
+            {
+                "name": "SAGEMAKER_PROGRAM",
+                "type": "text",
+                "default": "inference.py",
+                "scope": "container",
+                "required_for_model_class": True,
+            },
+            {
+                "name": "SAGEMAKER_SUBMIT_DIRECTORY",
+                "type": "text",
+                "default": "/opt/ml/model/code",
+                "scope": "container",
+                "required_for_model_class": False,
+            },
+            {
+                "name": "SAGEMAKER_CONTAINER_LOG_LEVEL",
+                "type": "text",
+                "default": "20",
+                "scope": "container",
+                "required_for_model_class": False,
+            },
+            {
+                "name": "SAGEMAKER_MODEL_SERVER_TIMEOUT",
+                "type": "text",
+                "default": "3600",
+                "scope": "container",
+                "required_for_model_class": False,
+            },
+            {
+                "name": "ENDPOINT_SERVER_TIMEOUT",
+                "type": "int",
+                "default": 3600,
+                "scope": "container",
+                "required_for_model_class": True,
+            },
+            {
+                "name": "MODEL_CACHE_ROOT",
+                "type": "text",
+                "default": "/opt/ml/model",
+                "scope": "container",
+                "required_for_model_class": True,
+            },
+            {
+                "name": "SAGEMAKER_ENV",
+                "type": "text",
+                "default": "1",
+                "scope": "container",
+                "required_for_model_class": True,
+            },
+            {
+                "name": "HF_MODEL_ID",
+                "type": "text",
+                "default": "/opt/ml/model",
+                "scope": "container",
+                "required_for_model_class": True,
+            },
+            {
+                "name": "SM_NUM_GPUS",
+                "type": "text",
+                "default": "8",
+                "scope": "container",
+                "required_for_model_class": True,
+            },
+            {
+                "name": "MAX_INPUT_LENGTH",
+                "type": "text",
+                "default": "1024",
+                "scope": "container",
+                "required_for_model_class": True,
+            },
+            {
+                "name": "MAX_TOTAL_TOKENS",
+                "type": "text",
+                "default": "2048",
+                "scope": "container",
+                "required_for_model_class": True,
+            },
+            {
+                "name": "SAGEMAKER_MODEL_SERVER_WORKERS",
+                "type": "int",
+                "default": 1,
+                "scope": "container",
+                "required_for_model_class": True,
+            },
+        ],
+        "metrics": [],
+        "default_inference_instance_type": "ml.p4de.24xlarge",
+        "supported_inference_instance_types": ["ml.p4de.24xlarge"],
+        "model_kwargs": {},
+        "deploy_kwargs": {
+            "model_data_download_timeout": 3600,
+            "container_startup_health_check_timeout": 3600,
+        },
+        "predictor_specs": {
+            "supported_content_types": ["application/json"],
+            "supported_accept_types": ["application/json"],
+            "default_content_type": "application/json",
+            "default_accept_type": "application/json",
+        },
+        "inference_volume_size": 512,
+        "inference_enable_network_isolation": True,
+        "validation_supported": False,
+        "fine_tuning_supported": False,
+        "resource_name_base": "hf-llm-falcon-180b-bf16",
+        "hosting_instance_type_variants": {
+            "regional_aliases": {
+                "us-west-2": {
+                    "gpu_image_uri": "763104351884.dkr.ecr.us-west-2.amazonaws.com/"
+                    "huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04",
+                    "cpu_image_uri": "867930986793.dkr.us-west-2.amazonaws.com/cpu-blah",
+                }
+            },
+            "variants": {
+                "g4dn": {"regional_properties": {"image_uri": "$gpu_ecr_uri_1"}},
+                "g5": {"regional_properties": {"image_uri": "$gpu_ecr_uri_1"}},
+                "local_gpu": {"regional_properties": {"image_uri": "$gpu_ecr_uri_1"}},
+                "p2": {"regional_properties": {"image_uri": "$gpu_ecr_uri_1"}},
+                "p3": {"regional_properties": {"image_uri": "$gpu_ecr_uri_1"}},
+                "p3dn": {"regional_properties": {"image_uri": "$gpu_ecr_uri_1"}},
+                "p4d": {"regional_properties": {"image_uri": "$gpu_ecr_uri_1"}},
+                "p4de": {"regional_properties": {"image_uri": "$gpu_ecr_uri_1"}},
+                "p5": {"regional_properties": {"image_uri": "$gpu_ecr_uri_1"}},
+                "ml.g5.48xlarge": {"properties": {"environment_variables": {"SM_NUM_GPUS": "80"}}},
+                "ml.p4d.24xlarge": {
+                    "properties": {
+                        "environment_variables": {
+                            "YODEL": "NACEREMA",
+                        }
+                    }
+                },
+            },
+        },
+    },
     "variant-model": {
         "model_id": "pytorch-ic-mobilenet-v2",
         "url": "https://pytorch.org/hub/pytorch_vision_mobilenet_v2/",
diff --git a/tests/unit/sagemaker/jumpstart/test_types.py b/tests/unit/sagemaker/jumpstart/test_types.py

Original file line number	Diff line number	Diff line change
`@@ -286,6 +286,8 @@ def _add_env_to_kwargs(kwargs: JumpStartModelInitKwargs) -> JumpStartModelInitKw`
`286`	`286`	`tolerate_deprecated_model=kwargs.tolerate_deprecated_model,`
`287`	`287`	`tolerate_vulnerable_model=kwargs.tolerate_vulnerable_model,`
`288`	`288`	`sagemaker_session=kwargs.sagemaker_session,`
	`289`	`+ script=JumpStartScriptScope.INFERENCE,`
	`290`	`+ instance_type=kwargs.instance_type,`
`289`	`291`	`)`
`290`	`292`
`291`	`293`	`for key, value in extra_env_vars.items():`