fix RegisterModel step with model repacking request dict conversion error + add unite and integ tests for it

jerrypeng7773 · jerrypeng7773 · commit 23201d9a6333 · 2021-03-31T16:51:33.000-07:00
diff --git a/src/sagemaker/workflow/step_collections.py b/src/sagemaker/workflow/step_collections.py
@@ -109,6 +109,11 @@ def __init__(
             steps.append(repack_model_step)
             model_data = repack_model_step.properties.ModelArtifacts.S3ModelArtifacts
 
+        # remove kwargs consumed by model repacking step
+        kwargs.pop("entry_point", None)
+        kwargs.pop("source_dir", None)
+        kwargs.pop("dependencies", None)
+
         register_model_step = _RegisterModelStep(
             name=name,
             estimator=estimator,
diff --git a/tests/integ/test_workflow.py b/tests/integ/test_workflow.py
@@ -776,6 +776,106 @@ def test_conditional_pytorch_training_model_registration(
             pass
 
 
+def test_model_registration_with_model_repack(
+    sagemaker_session,
+    role,
+    pipeline_name,
+    region_name,
+):
+    base_dir = os.path.join(DATA_DIR, "pytorch_mnist")
+    entry_point = os.path.join(base_dir, "mnist.py")
+    input_path = sagemaker_session.upload_data(
+        path=os.path.join(base_dir, "training"),
+        key_prefix="integ-test-data/pytorch_mnist/training",
+    )
+    inputs = TrainingInput(s3_data=input_path)
+
+    instance_count = ParameterInteger(name="InstanceCount", default_value=1)
+    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
+    good_enough_input = ParameterInteger(name="GoodEnoughInput", default_value=1)
+
+    pytorch_estimator = PyTorch(
+        entry_point=entry_point,
+        role=role,
+        framework_version="1.5.0",
+        py_version="py3",
+        instance_count=instance_count,
+        instance_type=instance_type,
+        sagemaker_session=sagemaker_session,
+    )
+    step_train = TrainingStep(
+        name="pytorch-train",
+        estimator=pytorch_estimator,
+        inputs=inputs,
+    )
+
+    step_register = RegisterModel(
+        name="pytorch-register-model",
+        estimator=pytorch_estimator,
+        model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
+        content_types=["*"],
+        response_types=["*"],
+        inference_instances=["*"],
+        transform_instances=["*"],
+        description="test-description",
+        entry_point=entry_point,
+    )
+
+    model = Model(
+        image_uri=pytorch_estimator.training_image_uri(),
+        model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
+        sagemaker_session=sagemaker_session,
+        role=role,
+    )
+    model_inputs = CreateModelInput(
+        instance_type="ml.m5.large",
+        accelerator_type="ml.eia1.medium",
+    )
+    step_model = CreateModelStep(
+        name="pytorch-model",
+        model=model,
+        inputs=model_inputs,
+    )
+
+    step_cond = ConditionStep(
+        name="cond-good-enough",
+        conditions=[ConditionGreaterThanOrEqualTo(left=good_enough_input, right=1)],
+        if_steps=[step_train, step_register],
+        else_steps=[step_model],
+    )
+
+    pipeline = Pipeline(
+        name=pipeline_name,
+        parameters=[good_enough_input, instance_count, instance_type],
+        steps=[step_cond],
+        sagemaker_session=sagemaker_session,
+    )
+
+    try:
+        response = pipeline.create(role)
+        create_arn = response["PipelineArn"]
+        assert re.match(
+            fr"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}", create_arn
+        )
+
+        execution = pipeline.start(parameters={})
+        assert re.match(
+            fr"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
+            execution.arn,
+        )
+
+        execution = pipeline.start(parameters={"GoodEnoughInput": 0})
+        assert re.match(
+            fr"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
+            execution.arn,
+        )
+    finally:
+        try:
+            pipeline.delete()
+        except Exception:
+            pass
+
+
 def test_training_job_with_debugger_and_profiler(
     sagemaker_session,
     pipeline_name,
diff --git a/tests/unit/sagemaker/workflow/test_step_collections.py b/tests/unit/sagemaker/workflow/test_step_collections.py
@@ -14,6 +14,7 @@
 from __future__ import absolute_import
 
 import pytest
+from tests.unit import DATA_DIR
 
 import sagemaker
 
@@ -38,13 +39,17 @@
     StepCollection,
     RegisterModel,
 )
+from sagemaker.workflow.pipeline import Pipeline
 from tests.unit.sagemaker.workflow.helpers import ordered
 
 REGION = "us-west-2"
 BUCKET = "my-bucket"
 IMAGE_URI = "fakeimage"
 ROLE = "DummyRole"
 MODEL_NAME = "gisele"
+MODEL_REPACKING_IMAGE_URI = (
+    "246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3"
+)
 
 
 class CustomStep(Step):
@@ -177,6 +182,111 @@ def test_register_model(estimator, model_metrics):
     )
 
 
+def test_register_model_with_model_repack(estimator, model_metrics):
+    model_data = f"s3://{BUCKET}/model.tar.gz"
+    register_model = RegisterModel(
+        name="RegisterModelStep",
+        estimator=estimator,
+        model_data=model_data,
+        content_types=["content_type"],
+        response_types=["response_type"],
+        inference_instances=["inference_instance"],
+        transform_instances=["transform_instance"],
+        model_package_group_name="mpg",
+        model_metrics=model_metrics,
+        approval_status="Approved",
+        description="description",
+        entry_point=f"{DATA_DIR}/dummy_script.py",
+    )
+
+    request_dicts = register_model.request_dicts()
+    assert len(request_dicts) == 2
+    print(request_dicts)
+    for request_dict in request_dicts:
+        if request_dict["Type"] == "Training":
+            assert request_dict["Name"] == "RegisterModelStepRepackModel"
+            arguments = request_dict["Arguments"]
+            repacker_job_name = arguments["HyperParameters"]["sagemaker_job_name"]
+            assert ordered(arguments) == ordered(
+                {
+                    "AlgorithmSpecification": {
+                        "TrainingImage": MODEL_REPACKING_IMAGE_URI,
+                        "TrainingInputMode": "File",
+                    },
+                    "DebugHookConfig": {
+                        "CollectionConfigurations": [],
+                        "S3OutputPath": f"s3://{BUCKET}/",
+                    },
+                    "HyperParameters": {
+                        "inference_script": '"dummy_script.py"',
+                        "model_archive": '"model.tar.gz"',
+                        "sagemaker_submit_directory": '"s3://{}/{}/source/sourcedir.tar.gz"'.format(
+                            BUCKET, repacker_job_name.replace('"', "")
+                        ),
+                        "sagemaker_program": '"_repack_model.py"',
+                        "sagemaker_container_log_level": "20",
+                        "sagemaker_job_name": repacker_job_name,
+                        "sagemaker_region": f'"{REGION}"',
+                    },
+                    "InputDataConfig": [
+                        {
+                            "ChannelName": "training",
+                            "DataSource": {
+                                "S3DataSource": {
+                                    "S3DataDistributionType": "FullyReplicated",
+                                    "S3DataType": "S3Prefix",
+                                    "S3Uri": f"s3://{BUCKET}",
+                                }
+                            },
+                        }
+                    ],
+                    "OutputDataConfig": {"S3OutputPath": f"s3://{BUCKET}/"},
+                    "ResourceConfig": {
+                        "InstanceCount": 1,
+                        "InstanceType": "ml.m5.large",
+                        "VolumeSizeInGB": 30,
+                    },
+                    "RoleArn": ROLE,
+                    "StoppingCondition": {"MaxRuntimeInSeconds": 86400},
+                }
+            )
+        elif request_dict["Type"] == "RegisterModel":
+            assert request_dict["Name"] == "RegisterModelStep"
+            arguments = request_dict["Arguments"]
+            assert len(arguments["InferenceSpecification"]["Containers"]) == 1
+            assert (
+                arguments["InferenceSpecification"]["Containers"][0]["Image"]
+                == estimator.training_image_uri()
+            )
+            assert isinstance(
+                arguments["InferenceSpecification"]["Containers"][0]["ModelDataUrl"], Properties
+            )
+            del arguments["InferenceSpecification"]["Containers"]
+            assert ordered(arguments) == ordered(
+                {
+                    "InferenceSpecification": {
+                        "SupportedContentTypes": ["content_type"],
+                        "SupportedRealtimeInferenceInstanceTypes": ["inference_instance"],
+                        "SupportedResponseMIMETypes": ["response_type"],
+                        "SupportedTransformInstanceTypes": ["transform_instance"],
+                    },
+                    "ModelApprovalStatus": "Approved",
+                    "ModelMetrics": {
+                        "ModelQuality": {
+                            "Statistics": {
+                                "ContentType": "text/csv",
+                                "S3Uri": f"s3://{BUCKET}/metrics.csv",
+                            },
+                        },
+                    },
+                    "ModelPackageDescription": "description",
+                    "ModelPackageGroupName": "mpg",
+                }
+            )
+        else:
+            raise Exception("A step exists in the collection of an invalid type.")
+
+
 def test_estimator_transformer(estimator):
     model_data = f"s3://{BUCKET}/model.tar.gz"
     model_inputs = CreateModelInput(