Merge branch 'master' into triton

SSRraymond · web-flow · commit 130c3678656c · 2023-12-05T12:52:36.000-08:00
diff --git a/src/sagemaker/local/entities.py b/src/sagemaker/local/entities.py
@@ -201,6 +201,7 @@ def __init__(self, container):
         self.end_time = None
         self.environment = None
         self.training_job_name = ""
+        self.output_data_config = None
 
     def start(self, input_data_config, output_data_config, hyperparameters, environment, job_name):
         """Starts a local training job.
@@ -248,6 +249,7 @@ def start(self, input_data_config, output_data_config, hyperparameters, environm
         self.end_time = datetime.datetime.now()
         self.state = self._COMPLETED
         self.training_job_name = job_name
+        self.output_data_config = output_data_config
 
     def describe(self):
         """Placeholder docstring"""
@@ -259,6 +261,11 @@ def describe(self):
             "TrainingStartTime": self.start_time,
             "TrainingEndTime": self.end_time,
             "ModelArtifacts": {"S3ModelArtifacts": self.model_artifacts},
+            "OutputDataConfig": self.output_data_config,
+            "Environment": self.environment,
+            "AlgorithmSpecification": {
+                "ContainerEntrypoint": self.container.container_entrypoint,
+            },
         }
         return response
 
@@ -668,7 +675,12 @@ def start(self, **kwargs):
         from sagemaker.local.pipeline import LocalPipelineExecutor
 
         execution_id = str(uuid4())
-        execution = _LocalPipelineExecution(execution_id, self.pipeline, **kwargs)
+        execution = _LocalPipelineExecution(
+            execution_id=execution_id,
+            pipeline=self.pipeline,
+            local_session=self.local_session,
+            **kwargs,
+        )
 
         self._executions[execution_id] = execution
         print(
@@ -689,13 +701,16 @@ def __init__(
         PipelineParameters=None,
         PipelineExecutionDescription=None,
         PipelineExecutionDisplayName=None,
+        local_session=None,
     ):
         from sagemaker.workflow.pipeline import PipelineGraph
+        from sagemaker import LocalSession
 
         self.pipeline = pipeline
         self.pipeline_execution_name = execution_id
         self.pipeline_execution_description = PipelineExecutionDescription
         self.pipeline_execution_display_name = PipelineExecutionDisplayName
+        self.local_session = local_session or LocalSession()
         self.status = _LocalExecutionStatus.EXECUTING.value
         self.failure_reason = None
         self.creation_time = datetime.datetime.now().timestamp()
@@ -731,6 +746,27 @@ def list_steps(self):
             ]
         }
 
+    def result(self, step_name: str):
+        """Retrieves the output of the provided step if it is a ``@step`` decorated function.
+
+        Args:
+            step_name (str): The name of the pipeline step.
+        Returns:
+            The step output.
+
+        Raises:
+              ValueError if the provided step is not a ``@step`` decorated function.
+              RuntimeError if the provided step is not in "Completed" status.
+        """
+        from sagemaker.workflow.pipeline import get_function_step_result
+
+        return get_function_step_result(
+            step_name=step_name,
+            step_list=self.list_steps()["PipelineExecutionSteps"],
+            execution_id=self.pipeline_execution_name,
+            sagemaker_session=self.local_session,
+        )
+
     def update_execution_success(self):
         """Mark execution as succeeded."""
         self.status = _LocalExecutionStatus.SUCCEEDED.value
diff --git a/src/sagemaker/remote_function/core/stored_function.py b/src/sagemaker/remote_function/core/stored_function.py
@@ -142,7 +142,7 @@ def load_and_invoke(self) -> Any:
 
         logger.info(
             "Serializing the function return and uploading to %s",
-            s3_path_join(self.func_upload_path, RESULTS_FOLDER),
+            s3_path_join(self.results_upload_path, RESULTS_FOLDER),
         )
         serialization.serialize_obj_to_s3(
             obj=result,
diff --git a/src/sagemaker/serve/utils/types.py b/src/sagemaker/serve/utils/types.py
@@ -40,3 +40,6 @@ def __str__(self) -> str:
 
     CPU = 1
     GPU = 2
+    INFERENTIA_1 = 3
+    INFERENTIA_2 = 4
+    GRAVITON = 5
diff --git a/src/sagemaker/serve/validations/check_image_and_hardware_type.py b/src/sagemaker/serve/validations/check_image_and_hardware_type.py
@@ -17,6 +17,21 @@
 }
 
 
+INF1_INSTANCE_FAMILIES = {"ml.inf1"}
+INF2_INSTANCE_FAMILIES = {"ml.inf2"}
+
+GRAVITON_INSTANCE_FAMILIES = {
+    "ml.c7g",
+    "ml.m6g",
+    "ml.m6gd",
+    "ml.c6g",
+    "ml.c6gd",
+    "ml.c6gn",
+    "ml.r6g",
+    "ml.r6gd",
+}
+
+
 def validate_image_uri_and_hardware(image_uri: str, instance_type: str, model_server: ModelServer):
     """Placeholder docstring"""
     if "xgboost" in image_uri:
@@ -57,6 +72,12 @@ def detect_hardware_type_of_instance(instance_type: str) -> HardwareType:
     instance_family = instance_type.rsplit(".", 1)[0]
     if instance_family in GPU_INSTANCE_FAMILIES:
         return HardwareType.GPU
+    if instance_family in INF1_INSTANCE_FAMILIES:
+        return HardwareType.INFERENTIA_1
+    if instance_family in INF2_INSTANCE_FAMILIES:
+        return HardwareType.INFERENTIA_2
+    if instance_family in GRAVITON_INSTANCE_FAMILIES:
+        return HardwareType.GRAVITON
     return HardwareType.CPU
 
 
@@ -67,4 +88,13 @@ def detect_triton_image_hardware_type(image_uri: str) -> HardwareType:
 
 def detect_torchserve_image_hardware_type(image_uri: str) -> HardwareType:
     """Placeholder docstring"""
-    return HardwareType.CPU if "cpu" in image_uri else HardwareType.GPU
+    if "neuronx" in image_uri:
+        return HardwareType.INFERENTIA_2
+    if "neuron" in image_uri:
+        return HardwareType.INFERENTIA_1
+    if "graviton" in image_uri:
+        return HardwareType.GRAVITON
+    if "cpu" in image_uri:
+        return HardwareType.CPU
+
+    return HardwareType.GPU
diff --git a/src/sagemaker/workflow/pipeline.py b/src/sagemaker/workflow/pipeline.py
@@ -24,7 +24,7 @@
 import pytz
 from botocore.exceptions import ClientError, WaiterError
 
-from sagemaker import s3
+from sagemaker import s3, LocalSession
 from sagemaker._studio import _append_project_tags
 from sagemaker.config import PIPELINE_ROLE_ARN_PATH, PIPELINE_TAGS_PATH
 from sagemaker.remote_function.core.serialization import deserialize_obj_from_s3
@@ -973,41 +973,81 @@ def result(self, step_name: str):
         except WaiterError as e:
             if "Waiter encountered a terminal failure state" not in str(e):
                 raise
-        step = next(filter(lambda x: x["StepName"] == step_name, self.list_steps()), None)
-        if not step:
-            raise ValueError(f"Invalid step name {step_name}")
-        step_type = next(iter(step["Metadata"]))
-        step_metadata = next(iter(step["Metadata"].values()))
-        if step_type != "TrainingJob":
-            raise ValueError(
-                "This method can only be used on pipeline steps created using " "@step decorator."
-            )
 
-        job_arn = step_metadata["Arn"]
-        job_name = job_arn.split("/")[-1]
+        return get_function_step_result(
+            step_name=step_name,
+            step_list=self.list_steps(),
+            execution_id=self.arn.split("/")[-1],
+            sagemaker_session=self.sagemaker_session,
+        )
 
-        describe_training_job_response = self.sagemaker_session.describe_training_job(job_name)
-        container_args = describe_training_job_response["AlgorithmSpecification"][
-            "ContainerEntrypoint"
-        ]
-        if container_args != JOBS_CONTAINER_ENTRYPOINT:
-            raise ValueError(
-                "This method can only be used on pipeline steps created using @step decorator."
-            )
-        s3_output_path = describe_training_job_response["OutputDataConfig"]["S3OutputPath"]
 
-        job_status = describe_training_job_response["TrainingJobStatus"]
-        if job_status == "Completed":
-            return deserialize_obj_from_s3(
-                sagemaker_session=self.sagemaker_session,
-                s3_uri=s3_path_join(
-                    s3_output_path, self.arn.split("/")[-1], step_name, RESULTS_FOLDER
-                ),
-                hmac_key=describe_training_job_response["Environment"][
-                    "REMOTE_FUNCTION_SECRET_KEY"
-                ],
-            )
-        raise RemoteFunctionError(f"Pipeline step {step_name} is in {job_status} status.")
+def get_function_step_result(
+    step_name: str,
+    step_list: list,
+    execution_id: str,
+    sagemaker_session: Session,
+):
+    """Helper function to retrieve the output of a ``@step`` decorated function.
+
+    Args:
+        step_name (str): The name of the pipeline step.
+        step_list (list): A list of executed pipeline steps of the specified execution.
+        execution_id (str): The specified id of the pipeline execution.
+        sagemaker_session (Session): Session object which manages interactions
+            with Amazon SageMaker APIs and any other AWS services needed.
+    Returns:
+        The step output.
+
+    Raises:
+        ValueError if the provided step is not a ``@step`` decorated function.
+        RemoteFunctionError if the provided step is not in "Completed" status
+    """
+    _ERROR_MSG_OF_WRONG_STEP_TYPE = (
+        "This method can only be used on pipeline steps created using @step decorator."
+    )
+    _ERROR_MSG_OF_STEP_INCOMPLETE = (
+        f"Unable to retrieve step output as the step {step_name} is not in Completed status."
+    )
+
+    step = next(filter(lambda x: x["StepName"] == step_name, step_list), None)
+    if not step:
+        raise ValueError(f"Invalid step name {step_name}")
+
+    if isinstance(sagemaker_session, LocalSession) and not step.get("Metadata", None):
+        # In local mode, if the training job failed,
+        # it's not tracked in LocalSagemakerClient and it's not describable.
+        # Thus, the step Metadata is not set.
+        raise RuntimeError(_ERROR_MSG_OF_STEP_INCOMPLETE)
+
+    step_type = next(iter(step["Metadata"]))
+    step_metadata = next(iter(step["Metadata"].values()))
+    if step_type != "TrainingJob":
+        raise ValueError(_ERROR_MSG_OF_WRONG_STEP_TYPE)
+
+    job_arn = step_metadata["Arn"]
+    job_name = job_arn.split("/")[-1]
+
+    if isinstance(sagemaker_session, LocalSession):
+        describe_training_job_response = sagemaker_session.sagemaker_client.describe_training_job(
+            job_name
+        )
+    else:
+        describe_training_job_response = sagemaker_session.describe_training_job(job_name)
+    container_args = describe_training_job_response["AlgorithmSpecification"]["ContainerEntrypoint"]
+    if container_args != JOBS_CONTAINER_ENTRYPOINT:
+        raise ValueError(_ERROR_MSG_OF_WRONG_STEP_TYPE)
+    s3_output_path = describe_training_job_response["OutputDataConfig"]["S3OutputPath"]
+
+    job_status = describe_training_job_response["TrainingJobStatus"]
+    if job_status == "Completed":
+        return deserialize_obj_from_s3(
+            sagemaker_session=sagemaker_session,
+            s3_uri=s3_path_join(s3_output_path, execution_id, step_name, RESULTS_FOLDER),
+            hmac_key=describe_training_job_response["Environment"]["REMOTE_FUNCTION_SECRET_KEY"],
+        )
+
+    raise RemoteFunctionError(_ERROR_MSG_OF_STEP_INCOMPLETE)
 
 
 class PipelineGraph:
diff --git a/tests/integ/sagemaker/workflow/test_step_decorator.py b/tests/integ/sagemaker/workflow/test_step_decorator.py
@@ -592,7 +592,7 @@ def divide(x, y):
         step_name = execution_steps[0]["StepName"]
         with pytest.raises(RemoteFunctionError) as e:
             execution.result(step_name)
-            assert f"Pipeline step {step_name} is in Failed status." in str(e)
+            assert f"step {step_name} is not in Completed status." in str(e)
     finally:
         try:
             pipeline.delete()
diff --git a/tests/integ/test_local_mode.py b/tests/integ/test_local_mode.py
@@ -24,6 +24,7 @@
 import stopit
 
 import tests.integ.lock as lock
+from sagemaker.workflow.step_outputs import get_step
 from tests.integ.sagemaker.conftest import _build_container, DOCKERFILE_TEMPLATE
 from sagemaker.config import SESSION_DEFAULT_S3_BUCKET_PATH
 from sagemaker.utils import resolve_value_from_config
@@ -769,7 +770,7 @@ def test_local_pipeline_with_step_decorator_and_step_dependency(
     )
 
     @step(**step_settings)
-    def generator() -> tuple:
+    def generator():
         return 3, 4
 
     @step(**step_settings)
@@ -798,6 +799,9 @@ def sum(a, b):
     pipeline_execution_list_steps_result = execution.list_steps()
     assert len(pipeline_execution_list_steps_result["PipelineExecutionSteps"]) == 2
 
+    assert execution.result(step_name=get_step(step_output_a).name) == (3, 4)
+    assert execution.result(step_name=get_step(step_output_b).name) == 7
+
 
 @pytest.mark.local_mode
 def test_local_pipeline_with_step_decorator_and_pre_exe_script(
diff --git a/tests/unit/sagemaker/local/test_local_pipeline.py b/tests/unit/sagemaker/local/test_local_pipeline.py
diff --git a/tests/unit/sagemaker/serve/validations/test_check_image_and_hardware_type.py b/tests/unit/sagemaker/serve/validations/test_check_image_and_hardware_type.py

Original file line number	Diff line number	Diff line change
`@@ -142,7 +142,7 @@ def load_and_invoke(self) -> Any:`
`142`	`142`
`143`	`143`	`logger.info(`
`144`	`144`	`"Serializing the function return and uploading to %s",`
`145`		`- s3_path_join(self.func_upload_path, RESULTS_FOLDER),`
	`145`	`+ s3_path_join(self.results_upload_path, RESULTS_FOLDER),`
`146`	`146`	`)`
`147`	`147`	`serialization.serialize_obj_to_s3(`
`148`	`148`	`obj=result,`