Merge branch 'master' into doc_inputs

shreyapandit · web-flow · commit 0c28310a9d0b · 2021-08-06T16:11:50.000-07:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,11 @@
 # Changelog
 
+## v2.52.1 (2021-08-06)
+
+### Bug Fixes and Other Changes
+
+ * revert #2251 changes for sklearn processor
+
 ## v2.52.0 (2021-08-05)
 
 ### Features
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-2.52.1.dev0
+2.52.2.dev0
diff --git a/doc/api/inference/model.rst b/doc/api/inference/model.rst
@@ -15,3 +15,8 @@ Model
     :members:
     :undoc-members:
     :show-inheritance:
+
+.. autoclass:: sagemaker.serverless.model.LambdaModel
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/doc/api/inference/predictors.rst b/doc/api/inference/predictors.rst
@@ -7,3 +7,8 @@ Make real-time predictions against SageMaker endpoints with Python objects
     :members:
     :undoc-members:
     :show-inheritance:
+
+.. autoclass:: sagemaker.serverless.predictor.LambdaPredictor
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/doc/overview.rst b/doc/overview.rst
@@ -1063,6 +1063,50 @@ You can also find these notebooks in the **Advanced Functionality** section of t
 For information about using sample notebooks in a SageMaker notebook instance, see `Use Example Notebooks <https://docs.aws.amazon.com/sagemaker/latest/dg/howitworks-nbexamples.html>`__
 in the AWS documentation.
 
+********************
+Serverless Inference
+********************
+
+You can use the SageMaker Python SDK to perform serverless inference on Lambda.
+
+To deploy models to Lambda, you must complete the following prerequisites:
+
+- `Package your model and inference code as a container image. <https://docs.aws.amazon.com/lambda/latest/dg/images-create.html>`_
+- `Create a role that lists Lambda as a trusted entity. <https://docs.aws.amazon.com/lambda/latest/dg/lambda-intro-execution-role.html#permissions-executionrole-console>`_
+
+After completing the prerequisites, you can deploy your model to Lambda using
+the `LambdaModel`_ class.
+
+.. code:: python
+
+   from sagemaker.serverless import LambdaModel
+
+   image_uri = "123456789012.dkr.ecr.us-west-2.amazonaws.com/my-lambda-repository:latest"
+   role = "arn:aws:iam::123456789012:role/MyLambdaExecutionRole"
+
+   model = LambdaModel(image_uri=image_uri, role=role)
+   predictor = model.deploy("my-lambda-function", timeout=20, memory_size=4092)
+
+The ``deploy`` method returns a `LambdaPredictor`_  instance. Use the
+`LambdaPredictor`_ ``predict`` method to perform inference on Lambda.
+
+.. code:: python
+
+   url = "https://example.com/cat.jpeg"
+   predictor.predict({"url": url})  # {'class': 'tabby'}
+
+Once you are done performing inference on Lambda, free the `LambdaModel`_ and
+`LambdaPredictor`_ resources using the ``delete_model`` and ``delete_predictor``
+methods.
+
+.. code:: python
+
+   model.delete_model()
+   predictor.delete_predictor()
+
+.. _LambdaModel : https://sagemaker.readthedocs.io/en/stable/api/inference/model.html#sagemaker.serverless.model.LambdaModel
+.. _LambdaPredictor : https://sagemaker.readthedocs.io/en/stable/api/inference/predictors.html#sagemaker.serverless.predictor.LambdaPredictor
+
 ******************
 SageMaker Workflow
 ******************
diff --git a/src/sagemaker/sklearn/processing.py b/src/sagemaker/sklearn/processing.py
@@ -17,67 +17,93 @@
 """
 from __future__ import absolute_import
 
-from sagemaker.processing import FrameworkProcessor
-from sagemaker.sklearn.estimator import SKLearn
+from sagemaker import image_uris, Session
+from sagemaker.processing import ScriptProcessor
+from sagemaker.sklearn import defaults
 
 
-class SKLearnProcessor(FrameworkProcessor):
-    """Initialize an ``SKLearnProcessor`` instance.
-
-    The SKLearnProcessor handles Amazon SageMaker processing tasks for jobs using scikit-learn.
-
-    Unless ``image_uri`` is specified, the scikit-learn environment is an
-    Amazon-built Docker container that executes functions defined in the supplied
-    ``code`` Python script.
-
-    The arguments have the exact same meaning as in ``FrameworkProcessor``.
-
-    .. tip::
-
-        You can find additional parameters for initializing this class at
-        :class:`~sagemaker.processing.FrameworkProcessor`.
-    """
-
-    estimator_cls = SKLearn
+class SKLearnProcessor(ScriptProcessor):
+    """Handles Amazon SageMaker processing tasks for jobs using scikit-learn."""
 
     def __init__(
         self,
-        framework_version,  # New arg
+        framework_version,
         role,
-        instance_count,
         instance_type,
-        py_version="py3",  # New kwarg
-        image_uri=None,
+        instance_count,
         command=None,
         volume_size_in_gb=30,
         volume_kms_key=None,
         output_kms_key=None,
-        code_location=None,  # New arg
         max_runtime_in_seconds=None,
         base_job_name=None,
         sagemaker_session=None,
         env=None,
         tags=None,
         network_config=None,
     ):
-        """This processor executes a Python script in a scikit-learn execution environment."""
-        super().__init__(
-            self.estimator_cls,
-            framework_version,
-            role,
-            instance_count,
-            instance_type,
-            py_version,
-            image_uri,
-            command,
-            volume_size_in_gb,
-            volume_kms_key,
-            output_kms_key,
-            code_location,
-            max_runtime_in_seconds,
-            base_job_name,
-            sagemaker_session,
-            env,
-            tags,
-            network_config,
+        """Initialize an ``SKLearnProcessor`` instance.
+
+        The SKLearnProcessor handles Amazon SageMaker processing tasks for jobs using scikit-learn.
+
+        Args:
+            framework_version (str): The version of scikit-learn.
+            role (str): An AWS IAM role name or ARN. The Amazon SageMaker training jobs
+                and APIs that create Amazon SageMaker endpoints use this role
+                to access training data and model artifacts. After the endpoint
+                is created, the inference code might use the IAM role, if it
+                needs to access an AWS resource.
+            instance_type (str): Type of EC2 instance to use for
+                processing, for example, 'ml.c4.xlarge'.
+            instance_count (int): The number of instances to run
+                the Processing job with. Defaults to 1.
+            command ([str]): The command to run, along with any command-line flags.
+                Example: ["python3", "-v"]. If not provided, ["python3"] or ["python2"]
+                will be chosen based on the py_version parameter.
+            volume_size_in_gb (int): Size in GB of the EBS volume to
+                use for storing data during processing (default: 30).
+            volume_kms_key (str): A KMS key for the processing
+                volume.
+            output_kms_key (str): The KMS key id for all ProcessingOutputs.
+            max_runtime_in_seconds (int): Timeout in seconds.
+                After this amount of time Amazon SageMaker terminates the job
+                regardless of its current status.
+            base_job_name (str): Prefix for processing name. If not specified,
+                the processor generates a default job name, based on the
+                training image name and current timestamp.
+            sagemaker_session (sagemaker.session.Session): Session object which
+                manages interactions with Amazon SageMaker APIs and any other
+                AWS services needed. If not specified, the processor creates one
+                using the default AWS configuration chain.
+            env (dict): Environment variables to be passed to the processing job.
+            tags ([dict]): List of tags to be passed to the processing job.
+            network_config (sagemaker.network.NetworkConfig): A NetworkConfig
+                object that configures network isolation, encryption of
+                inter-container traffic, security group IDs, and subnets.
+        """
+        if not command:
+            command = ["python3"]
+
+        session = sagemaker_session or Session()
+        region = session.boto_region_name
+
+        image_uri = image_uris.retrieve(
+            defaults.SKLEARN_NAME, region, version=framework_version, instance_type=instance_type
+        )
+
+        super(SKLearnProcessor, self).__init__(
+            role=role,
+            image_uri=image_uri,
+            instance_count=instance_count,
+            instance_type=instance_type,
+            command=command,
+            volume_size_in_gb=volume_size_in_gb,
+            volume_kms_key=volume_kms_key,
+            output_kms_key=output_kms_key,
+            max_runtime_in_seconds=max_runtime_in_seconds,
+            base_job_name=base_job_name,
+            sagemaker_session=session,
+            env=env,
+            tags=tags,
+            network_config=network_config,
         )
diff --git a/tests/integ/test_local_mode.py b/tests/integ/test_local_mode.py
@@ -349,12 +349,12 @@ def test_local_processing_sklearn(sagemaker_local_session_no_local_code, sklearn
 
     job_description = sklearn_processor.latest_job.describe()
 
-    assert len(job_description["ProcessingInputs"]) == 3
+    assert len(job_description["ProcessingInputs"]) == 2
     assert job_description["ProcessingResources"]["ClusterConfig"]["InstanceCount"] == 1
     assert job_description["ProcessingResources"]["ClusterConfig"]["InstanceType"] == "local"
     assert job_description["AppSpecification"]["ContainerEntrypoint"] == [
-        "/bin/bash",
-        "/opt/ml/processing/input/entrypoint/runproc.sh",
+        "python3",
+        "/opt/ml/processing/input/code/dummy_script.py",
     ]
     assert job_description["RoleArn"] == "<no_role>"
 
diff --git a/tests/integ/test_processing.py b/tests/integ/test_processing.py
@@ -125,6 +125,7 @@ def test_sklearn(sagemaker_session, sklearn_latest_version, cpu_instance_type):
         role=ROLE,
         instance_type=cpu_instance_type,
         instance_count=1,
+        command=["python3"],
         sagemaker_session=sagemaker_session,
         base_job_name="test-sklearn",
     )
@@ -138,16 +139,16 @@ def test_sklearn(sagemaker_session, sklearn_latest_version, cpu_instance_type):
 
     job_description = sklearn_processor.latest_job.describe()
 
-    assert len(job_description["ProcessingInputs"]) == 3
+    assert len(job_description["ProcessingInputs"]) == 2
     assert job_description["ProcessingResources"]["ClusterConfig"]["InstanceCount"] == 1
     assert (
         job_description["ProcessingResources"]["ClusterConfig"]["InstanceType"] == cpu_instance_type
     )
     assert job_description["ProcessingResources"]["ClusterConfig"]["VolumeSizeInGB"] == 30
     assert job_description["StoppingCondition"] == {"MaxRuntimeInSeconds": 86400}
     assert job_description["AppSpecification"]["ContainerEntrypoint"] == [
-        "/bin/bash",
-        "/opt/ml/processing/input/entrypoint/runproc.sh",
+        "python3",
+        "/opt/ml/processing/input/code/dummy_script.py",
     ]
     assert ROLE in job_description["RoleArn"]
 
@@ -203,7 +204,6 @@ def test_sklearn_with_customizations(
     assert job_description["ProcessingInputs"][0]["InputName"] == "dummy_input"
 
     assert job_description["ProcessingInputs"][1]["InputName"] == "code"
-    assert job_description["ProcessingInputs"][2]["InputName"] == "entrypoint"
 
     assert job_description["ProcessingJobName"].startswith("test-sklearn-with-customizations")
 
@@ -220,8 +220,8 @@ def test_sklearn_with_customizations(
 
     assert job_description["AppSpecification"]["ContainerArguments"] == ["-v"]
     assert job_description["AppSpecification"]["ContainerEntrypoint"] == [
-        "/bin/bash",
-        "/opt/ml/processing/input/entrypoint/runproc.sh",
+        "python3",
+        "/opt/ml/processing/input/code/dummy_script.py",
     ]
     assert job_description["AppSpecification"]["ImageUri"] == image_uri
 
@@ -245,6 +245,7 @@ def test_sklearn_with_custom_default_bucket(
     sklearn_processor = SKLearnProcessor(
         framework_version=sklearn_latest_version,
         role=ROLE,
+        command=["python3"],
         instance_type=cpu_instance_type,
         instance_count=1,
         volume_size_in_gb=100,
@@ -287,9 +288,6 @@ def test_sklearn_with_custom_default_bucket(
     assert job_description["ProcessingInputs"][0]["InputName"] == "dummy_input"
     assert custom_bucket_name in job_description["ProcessingInputs"][0]["S3Input"]["S3Uri"]
 
-    assert job_description["ProcessingInputs"][1]["InputName"] == "code"
-    assert custom_bucket_name in job_description["ProcessingInputs"][1]["S3Input"]["S3Uri"]
-
     assert job_description["ProcessingInputs"][2]["InputName"] == "entrypoint"
     assert custom_bucket_name in job_description["ProcessingInputs"][2]["S3Input"]["S3Uri"]
 
@@ -308,8 +306,8 @@ def test_sklearn_with_custom_default_bucket(
 
     assert job_description["AppSpecification"]["ContainerArguments"] == ["-v"]
     assert job_description["AppSpecification"]["ContainerEntrypoint"] == [
-        "/bin/bash",
-        "/opt/ml/processing/input/entrypoint/runproc.sh",
+        "python3",
+        "/opt/ml/processing/input/code/dummy_script.py",
     ]
     assert job_description["AppSpecification"]["ImageUri"] == image_uri
 
@@ -326,6 +324,7 @@ def test_sklearn_with_no_inputs_or_outputs(
     sklearn_processor = SKLearnProcessor(
         framework_version=sklearn_latest_version,
         role=ROLE,
+        command=["python3"],
         instance_type=cpu_instance_type,
         instance_count=1,
         volume_size_in_gb=100,
@@ -338,16 +337,12 @@ def test_sklearn_with_no_inputs_or_outputs(
     )
 
     sklearn_processor.run(
-        code=os.path.join(DATA_DIR, "dummy_script.py"),
-        arguments=["-v"],
-        wait=True,
-        logs=True,
+        code=os.path.join(DATA_DIR, "dummy_script.py"), arguments=["-v"], wait=True, logs=True
     )
 
     job_description = sklearn_processor.latest_job.describe()
 
     assert job_description["ProcessingInputs"][0]["InputName"] == "code"
-    assert job_description["ProcessingInputs"][1]["InputName"] == "entrypoint"
 
     assert job_description["ProcessingJobName"].startswith("test-sklearn-with-no-inputs")
 
@@ -361,8 +356,8 @@ def test_sklearn_with_no_inputs_or_outputs(
 
     assert job_description["AppSpecification"]["ContainerArguments"] == ["-v"]
     assert job_description["AppSpecification"]["ContainerEntrypoint"] == [
-        "/bin/bash",
-        "/opt/ml/processing/input/entrypoint/runproc.sh",
+        "python3",
+        "/opt/ml/processing/input/code/dummy_script.py",
     ]
     assert job_description["AppSpecification"]["ImageUri"] == image_uri
 
diff --git a/tests/integ/test_sklearn.py b/tests/integ/test_sklearn.py
@@ -46,20 +46,6 @@ def sklearn_training_job(
     sagemaker_session.boto_region_name
 
 
-def test_framework_processing_job_with_deps(
-    sagemaker_session,
-    sklearn_latest_version,
-    sklearn_latest_py_version,
-    cpu_instance_type,
-):
-    return _run_processing_job(
-        sagemaker_session,
-        cpu_instance_type,
-        sklearn_latest_version,
-        sklearn_latest_py_version,
-    )
-
-
 def test_training_with_additional_hyperparameters(
     sagemaker_session,
     sklearn_latest_version,
diff --git a/tests/unit/test_processing.py b/tests/unit/test_processing.py