Merge branch 'master' into fix/jumpstart-amt-tracking

evakravi · web-flow · commit 897365f06fec · 2022-05-19T09:33:00.000-04:00
diff --git a/doc/api/training/sdp_versions/latest.rst b/doc/api/training/sdp_versions/latest.rst
@@ -26,8 +26,8 @@ depending on the version of the library you use.
    <https://docs.aws.amazon.com/sagemaker/latest/dg/data-parallel-use-api.html#data-parallel-use-python-skd-api>`_
    for more information.
 
-Version 1.4.0 (Latest)
-======================
+Version 1.4.0, 1.4.1 (Latest)
+=============================
 
 .. toctree::
    :maxdepth: 1
diff --git a/doc/api/training/sdp_versions/v1.2.x/smd_data_parallel_pytorch.rst b/doc/api/training/sdp_versions/v1.2.x/smd_data_parallel_pytorch.rst
@@ -266,7 +266,7 @@ PyTorch API
       .. note::
 
         The ``no_sync()`` context manager is available from smdistributed-dataparallel v1.2.2.
-        To find the release note, see :ref:`sdp_1.2.2_release_note`.
+        To find the release note, see :ref:`sdp_release_note`.
 
       **Example:**
 
diff --git a/doc/api/training/smd_data_parallel_release_notes/smd_data_parallel_change_log.rst b/doc/api/training/smd_data_parallel_release_notes/smd_data_parallel_change_log.rst
@@ -1,4 +1,4 @@
-.. _sdp_1.2.2_release_note:
+.. _sdp_release_note:
 
 #############
 Release Notes
@@ -7,9 +7,45 @@ Release Notes
 New features, bug fixes, and improvements are regularly made to the SageMaker
 distributed data parallel library.
 
-SageMaker Distributed Data Parallel 1.4.0 Release Notes
+SageMaker Distributed Data Parallel 1.4.1 Release Notes
 =======================================================
 
+*Date: May. 3. 2022*
+
+**Currency Updates**
+
+* Added support for PyTorch 1.11.0
+
+**Known Issues**
+
+* The library currently does not support the PyTorch sub-process groups API (torch.distributed.new_group (https://pytorch.org/docs/stable/distributed.html#torch.distributed.new_group)).
+
+
+**Migration to AWS Deep Learning Containers**
+
+This version passed benchmark testing and is migrated to the following AWS Deep Learning Containers (DLC):
+
+- PyTorch 1.11.0 DLC
+
+  .. code::
+
+    763104351884.dkr.ecr.<region>.amazonaws.com/pytorch-training:1.11.0-gpu-py38-cu113-ubuntu20.04-sagemaker
+
+Binary file of this version of the library for custom container users:
+
+  .. code::
+
+    https://smdataparallel.s3.amazonaws.com/binary/pytorch/1.11.0/cu113/2022-04-14/smdistributed_dataparallel-1.4.1-cp38-cp38-linux_x86_64.whl
+
+
+----
+
+Release History
+===============
+
+SageMaker Distributed Data Parallel 1.4.0 Release Notes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 *Date: Feb. 24. 2022*
 
 **New Features**
@@ -72,11 +108,6 @@ This version passed benchmark testing and is migrated to the following AWS Deep
     763104351884.dkr.ecr.<region>.amazonaws.com/pytorch-training:1.10.2-gpu-py38-cu113-ubuntu20.04-sagemaker
 
 
-----
-
-Release History
-===============
-
 SageMaker Distributed Data Parallel 1.2.2 Release Notes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/doc/api/training/smd_model_parallel_release_notes/smd_model_parallel_change_log.rst b/doc/api/training/smd_model_parallel_release_notes/smd_model_parallel_change_log.rst
@@ -5,9 +5,41 @@ Release Notes
 New features, bug fixes, and improvements are regularly made to the SageMaker
 distributed model parallel library.
 
-SageMaker Distributed Model Parallel 1.8.1 Release Notes
+SageMaker Distributed Model Parallel 1.9.0 Release Notes
 ========================================================
 
+*Date: May. 3. 2022*
+
+**Currency Updates**
+
+* Added support for PyTorch 1.11.0
+
+**Migration to AWS Deep Learning Containers**
+
+This version passed benchmark testing and is migrated to the following AWS Deep Learning Containers (DLC):
+
+- PyTorch 1.11.0 DLC
+
+  .. code::
+
+    763104351884.dkr.ecr.<region>.amazonaws.com/pytorch-training:1.11.0-gpu-py38-cu113-ubuntu20.04-sagemaker
+
+Binary file of this version of the library for custom container users:
+
+  .. code::
+
+    https://sagemaker-distributed-model-parallel.s3.us-west-2.amazonaws.com/pytorch-1.11.0/build-artifacts/2022-04-20-17-05/smdistributed_modelparallel-1.9.0-cp38-cp38-linux_x86_64.whl
+
+
+
+----
+
+Release History
+===============
+
+SageMaker Distributed Model Parallel 1.8.1 Release Notes
+--------------------------------------------------------
+
 *Date: April. 23. 2022*
 
 **New Features**
@@ -59,11 +91,6 @@ This version passed benchmark testing and is migrated to the following AWS Deep
       https://sagemaker-distributed-model-parallel.s3.us-west-2.amazonaws.com/pytorch-1.10.0/build-artifacts/2022-04-14-03-58/smdistributed_modelparallel-1.8.1-cp38-cp38-linux_x86_64.whl
 
 
-----
-
-Release History
-===============
-
 SageMaker Distributed Model Parallel 1.8.0 Release Notes
 --------------------------------------------------------
 
@@ -91,7 +118,7 @@ This version passed benchmark testing and is migrated to the following AWS Deep
       763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:1.10.2-transformers4.17.0-gpu-py38-cu113-ubuntu20.04
 
 
-* The binary file of this version of the library for custom container users
+The binary file of this version of the library for custom container users:
 
     .. code::
 
diff --git a/doc/api/training/smp_versions/latest.rst b/doc/api/training/smp_versions/latest.rst
@@ -10,8 +10,8 @@ depending on which version of the library you need to use.
 To use the library, reference the
 **Common API** documentation alongside the framework specific API documentation.
 
-Version 1.7.0, 1.8.0, 1.8.1 (Latest)
-====================================
+Version 1.7.0, 1.8.0, 1.8.1, 1.9.0 (Latest)
+===========================================
 
 To use the library, reference the Common API documentation alongside the framework specific API documentation.
 
diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py
@@ -16,6 +16,7 @@
 import logging
 import os
 import re
+import time
 import shutil
 import tempfile
 from collections import namedtuple
@@ -24,6 +25,7 @@
 import sagemaker.image_uris
 from sagemaker.session_settings import SessionSettings
 import sagemaker.utils
+from sagemaker.workflow import is_pipeline_variable
 
 from sagemaker.deprecations import renamed_warning
 
@@ -395,8 +397,10 @@ def model_code_key_prefix(code_location_key_prefix, model_name, image):
     Returns:
         str: the key prefix to be used in uploading code
     """
-    training_job_name = sagemaker.utils.name_from_image(image)
-    return "/".join(filter(None, [code_location_key_prefix, model_name or training_job_name]))
+    name_from_image = f"/model_code/{int(time.time())}"
+    if not is_pipeline_variable(image):
+        name_from_image = sagemaker.utils.name_from_image(image)
+    return "/".join(filter(None, [code_location_key_prefix, model_name or name_from_image]))
 
 
 def warn_if_parameter_server_with_multi_gpu(training_instance_type, distribution):
diff --git a/tests/unit/sagemaker/workflow/test_model_step.py b/tests/unit/sagemaker/workflow/test_model_step.py
@@ -46,6 +46,8 @@
     SageMakerJobStepRetryPolicy,
 )
 from sagemaker.xgboost import XGBoostModel
+from sagemaker.lambda_helper import Lambda
+from sagemaker.workflow.lambda_step import LambdaStep, LambdaOutput, LambdaOutputTypeEnum
 from tests.unit import DATA_DIR
 from tests.unit.sagemaker.workflow.helpers import CustomStep
 
@@ -844,3 +846,44 @@ def _verify_register_model_container_definition(
     if submit_dir and not submit_dir.startswith("s3://"):
         # exclude the s3 path assertion as it contains timestamp
         assert submit_dir == expected_submit_dir
+
+
+def test_model_step_with_lambda_property_reference(pipeline_session):
+    lambda_step = LambdaStep(
+        name="MyLambda",
+        lambda_func=Lambda(
+            function_arn="arn:aws:lambda:us-west-2:123456789012:function:sagemaker_test_lambda"
+        ),
+        outputs=[
+            LambdaOutput(output_name="model_image", output_type=LambdaOutputTypeEnum.String),
+            LambdaOutput(output_name="model_artifact", output_type=LambdaOutputTypeEnum.String),
+        ],
+    )
+
+    model = PyTorchModel(
+        name="MyModel",
+        framework_version="1.8.0",
+        py_version="py3",
+        image_uri=lambda_step.properties.Outputs["model_image"],
+        model_data=lambda_step.properties.Outputs["model_artifact"],
+        sagemaker_session=pipeline_session,
+        entry_point=f"{DATA_DIR}/{_SCRIPT_NAME}",
+        role=_ROLE,
+    )
+
+    step_create_model = ModelStep(name="mymodelstep", step_args=model.create())
+
+    pipeline = Pipeline(
+        name="MyPipeline",
+        steps=[lambda_step, step_create_model],
+        sagemaker_session=pipeline_session,
+    )
+    steps = json.loads(pipeline.definition())["Steps"]
+    repack_step = steps[1]
+    assert repack_step["Arguments"]["InputDataConfig"][0]["DataSource"]["S3DataSource"][
+        "S3Uri"
+    ] == {"Get": "Steps.MyLambda.OutputParameters['model_artifact']"}
+    register_step = steps[2]
+    assert register_step["Arguments"]["PrimaryContainer"]["Image"] == {
+        "Get": "Steps.MyLambda.OutputParameters['model_image']"
+    }