Added doc strings to entities.py, implemented integ tests and fixed two bugs.

EC2 Default User · EC2 Default User · commit a846b3210429 · 2020-12-17T19:27:47.000Z
diff --git a/src/sagemaker/local/entities.py b/src/sagemaker/local/entities.py
@@ -39,7 +39,7 @@
 
 
 class _LocalProcessingJob(object):
-    """Placeholder docstring"""
+    """Defines and starts a local processing job."""
 
     _STARTING = "Starting"
     _PROCESSING = "Processing"
@@ -49,7 +49,7 @@ class _LocalProcessingJob(object):
     def __init__(self, container):
         """
         Args:
-            container:
+            container: the local container object.
         """
         self.container = container
         self.state = "Created"
@@ -63,10 +63,10 @@ def __init__(self, container):
     def start(self, processing_inputs, processing_output_config, environment, processing_job_name):
         """
         Args:
-            processing_inputs:
-            processing_output_config:
-            environment:
-            processing_job_name:
+            processing_inputs: The processing input configuration.
+            processing_output_config: The processing input configuration.
+            environment: The collection of environment variables passed to the job.
+            processing_job_name: The processing job name.
         """
 
         for item in processing_inputs:
@@ -132,11 +132,12 @@ def start(self, processing_inputs, processing_output_config, environment, proces
         self.container.process(
             processing_inputs, processing_output_config, environment, processing_job_name
         )
+
         self.end_time = datetime.datetime.now()
         self.state = self._COMPLETED
 
     def describe(self):
-        """Placeholder docstring"""
+        """Describes a local processing job."""
 
         response = {
             "ProcessingJobArn": self.processing_job_name,
diff --git a/src/sagemaker/local/image.py b/src/sagemaker/local/image.py
@@ -143,9 +143,6 @@ def process(
                 host, environment, processing_inputs, processing_output_config, processing_job_name
             )
 
-        # Adding region name environment variable.
-        environment[REGION_ENV_NAME] = self.sagemaker_session.boto_region_name
-
         self._generate_compose_file(
             "process", additional_volumes=volumes, additional_env_vars=environment
         )
@@ -728,7 +725,7 @@ def _create_docker_host(self, host, environment, optml_subdirs, command, volumes
             if self.container_entrypoint:
                 host_config["entrypoint"] = self.container_entrypoint
             if self.container_arguments:
-                host_config["entrypoint"] += self.container_arguments
+                host_config["entrypoint"] = host_config["entrypoint"] + self.container_arguments
 
         # for GPU support pass in nvidia as the runtime, this is equivalent
         # to setting --runtime=nvidia in the docker commandline.
diff --git a/tests/integ/test_local_mode.py b/tests/integ/test_local_mode.py
@@ -25,6 +25,11 @@
 import tests.integ.lock as lock
 from tests.integ import DATA_DIR
 
+from sagemaker import image_uris
+
+from sagemaker.processing import ProcessingInput, ProcessingOutput, ScriptProcessor
+from sagemaker.sklearn.processing import SKLearnProcessor
+
 from sagemaker.local import LocalSession, LocalSagemakerRuntimeClient, LocalSagemakerClient
 from sagemaker.mxnet import MXNet
 
@@ -53,6 +58,22 @@ def _initialize(self, boto_session, sagemaker_client, sagemaker_runtime_client,
         self.local_mode = True
 
 
+@pytest.fixture(scope="module")
+def image_uri(
+    sklearn_latest_version,
+    sklearn_latest_py_version,
+    cpu_instance_type,
+    sagemaker_session,
+):
+    return image_uris.retrieve(
+        "sklearn",
+        sagemaker_session.boto_region_name,
+        version=sklearn_latest_version,
+        py_version=sklearn_latest_py_version,
+        instance_type=cpu_instance_type,
+    )
+
+
 @pytest.fixture(scope="module")
 def mxnet_model(
     sagemaker_local_session, mxnet_inference_latest_version, mxnet_inference_latest_py_version
@@ -298,3 +319,106 @@ def test_local_transform_mxnet(
         transformer.wait()
 
     assert os.path.exists(os.path.join(str(tmpdir), "data.csv.out"))
+
+
+@pytest.mark.local_mode
+def test_local_processing_sklearn(sagemaker_local_session, sklearn_latest_version):
+    script_path = os.path.join(DATA_DIR, "dummy_script.py")
+    input_file_path = os.path.join(DATA_DIR, "dummy_input.txt")
+
+    sklearn_processor = SKLearnProcessor(
+        framework_version=sklearn_latest_version,
+        role="SageMakerRole",
+        instance_type="local",
+        instance_count=1,
+        command=["python3"],
+        sagemaker_session=sagemaker_local_session,
+    )
+
+    sklearn_processor.run(
+        code=script_path,
+        inputs=[ProcessingInput(source=input_file_path, destination="/opt/ml/processing/inputs/")],
+        wait=False,
+        logs=False,
+    )
+
+    job_description = sklearn_processor.latest_job.describe()
+
+    assert len(job_description["ProcessingInputs"]) == 2
+    assert job_description["ProcessingResources"]["ClusterConfig"]["InstanceCount"] == 1
+    assert job_description["ProcessingResources"]["ClusterConfig"]["InstanceType"] == "local"
+    assert job_description["AppSpecification"]["ContainerEntrypoint"] == [
+        "python3",
+        "/opt/ml/processing/input/code/dummy_script.py",
+    ]
+    assert job_description["RoleArn"] == "<no_role>"
+
+
+@pytest.mark.local_mode
+def test_local_processing_script_processor(sagemaker_local_session, image_uri):
+    input_file_path = os.path.join(DATA_DIR, "dummy_input.txt")
+
+    script_processor = ScriptProcessor(
+        role="SageMakerRole",
+        image_uri=image_uri,
+        command=["python3"],
+        instance_count=1,
+        instance_type="local",
+        volume_size_in_gb=30,
+        volume_kms_key=None,
+        max_runtime_in_seconds=3600,
+        base_job_name="test-script-processor",
+        env={"DUMMY_ENVIRONMENT_VARIABLE": "dummy-value"},
+        tags=[{"Key": "dummy-tag", "Value": "dummy-tag-value"}],
+        sagemaker_session=sagemaker_local_session,
+    )
+
+    script_processor.run(
+        code=os.path.join(DATA_DIR, "dummy_script.py"),
+        inputs=[
+            ProcessingInput(
+                source=input_file_path,
+                destination="/opt/ml/processing/input/container/path/",
+                input_name="dummy_input",
+                s3_data_type="S3Prefix",
+                s3_input_mode="File",
+                s3_data_distribution_type="FullyReplicated",
+                s3_compression_type="None",
+            )
+        ],
+        outputs=[
+            ProcessingOutput(
+                source="/opt/ml/processing/output/container/path/",
+                output_name="dummy_output",
+                s3_upload_mode="EndOfJob",
+            )
+        ],
+        arguments=["-v"],
+        wait=True,
+        logs=True,
+    )
+
+    job_description = script_processor.latest_job.describe()
+
+    assert job_description["ProcessingInputs"][0]["InputName"] == "dummy_input"
+
+    assert job_description["ProcessingInputs"][1]["InputName"] == "code"
+
+    assert job_description["ProcessingJobName"].startswith("test-script-processor")
+
+    assert job_description["ProcessingJobStatus"] == "Completed"
+
+    assert job_description["ProcessingOutputConfig"]["Outputs"][0]["OutputName"] == "dummy_output"
+
+    assert job_description["ProcessingResources"]["ClusterConfig"]["InstanceCount"] == 1
+    assert job_description["ProcessingResources"]["ClusterConfig"]["InstanceType"] == "local"
+    assert job_description["ProcessingResources"]["ClusterConfig"]["VolumeSizeInGB"] == 30
+
+    assert job_description["AppSpecification"]["ContainerArguments"] == ["-v"]
+    assert job_description["AppSpecification"]["ContainerEntrypoint"] == [
+        "python3",
+        "/opt/ml/processing/input/code/dummy_script.py",
+    ]
+    assert job_description["AppSpecification"]["ImageUri"] == image_uri
+
+    assert job_description["Environment"] == {"DUMMY_ENVIRONMENT_VARIABLE": "dummy-value"}