feat: swap FrameworkProcessor bash shell to sh

athewsey · athewsey · commit 051c5023621a · 2021-09-21T12:57:46.000+08:00
Some lightweight Linux distributions oriented towards containers
(e.g. Alpine) might not include bash by default: The POSIX-compliant
/bin/sh shell is more portable and a less stringent requirement on
custom containers to work with FrameworkProcessor. Also expanded
scope of tests.
diff --git a/src/sagemaker/processing.py b/src/sagemaker/processing.py
@@ -1318,7 +1318,7 @@ class FeatureStoreOutput(ApiObject):
 class FrameworkProcessor(ScriptProcessor):
     """Handles Amazon SageMaker processing tasks for jobs using a machine learning framework."""
 
-    framework_entrypoint_command = ["/bin/bash"]
+    framework_entrypoint_command = ["/bin/sh"]
 
     # Added new (kw)args for estimator. The rest are from ScriptProcessor with same defaults.
     def __init__(
@@ -1811,15 +1811,15 @@ def _generate_framework_script(self, user_script: str) -> str:
         """
         return dedent(
             """\
-            #!/bin/bash
+            #!/bin/sh
 
             cd /opt/ml/processing/input/code/
             tar -xzf sourcedir.tar.gz
 
             # Exit on any error. SageMaker uses error code to mark failed job.
             set -e
 
-            if [[ -f 'requirements.txt' ]]; then
+            if [ -f 'requirements.txt' ]; then
                 # Some py3 containers has typing, which may breaks pip install
                 pip uninstall --yes typing
 
diff --git a/tests/data/dummy_code_bundle_no_reqs/local_module.py b/tests/data/dummy_code_bundle_no_reqs/local_module.py
@@ -0,0 +1,2 @@
+"""A dummy Python module to check importing local files works OK"""
+DUMMY_CONSTANT = 1
diff --git a/tests/data/dummy_code_bundle_no_reqs/main_script.py b/tests/data/dummy_code_bundle_no_reqs/main_script.py
@@ -0,0 +1,8 @@
+"""A dummy SageMaker job script testing local imports"""
+
+print("This is the print output from dummy_code_bundle_no_reqs/main_script.py")
+
+print("Trying to import local module...")
+import local_module
+
+print("Done")
diff --git a/tests/integ/test_local_mode.py b/tests/integ/test_local_mode.py
@@ -353,7 +353,7 @@ def test_local_processing_sklearn(sagemaker_local_session_no_local_code, sklearn
     assert job_description["ProcessingResources"]["ClusterConfig"]["InstanceCount"] == 1
     assert job_description["ProcessingResources"]["ClusterConfig"]["InstanceType"] == "local"
     assert job_description["AppSpecification"]["ContainerEntrypoint"] == [
-        "/bin/bash",
+        "/bin/sh",
         "/opt/ml/processing/input/entrypoint/runproc.sh",
     ]
     assert job_description["RoleArn"] == "<no_role>"
diff --git a/tests/integ/test_processing.py b/tests/integ/test_processing.py
@@ -146,7 +146,7 @@ def test_sklearn(sagemaker_session, sklearn_latest_version, cpu_instance_type):
     assert job_description["ProcessingResources"]["ClusterConfig"]["VolumeSizeInGB"] == 30
     assert job_description["StoppingCondition"] == {"MaxRuntimeInSeconds": 86400}
     assert job_description["AppSpecification"]["ContainerEntrypoint"] == [
-        "/bin/bash",
+        "/bin/sh",
         "/opt/ml/processing/input/entrypoint/runproc.sh",
     ]
     assert ROLE in job_description["RoleArn"]
@@ -157,6 +157,7 @@ def test_sklearn_with_customizations(
     sagemaker_session, image_uri, sklearn_latest_version, cpu_instance_type, output_kms_key
 ):
     input_file_path = os.path.join(DATA_DIR, "dummy_input.txt")
+    code_bundle_path = os.path.join(DATA_DIR, "dummy_code_bundle_with_reqs")
 
     sklearn_processor = SKLearnProcessor(
         framework_version=sklearn_latest_version,
@@ -175,7 +176,8 @@ def test_sklearn_with_customizations(
     )
 
     sklearn_processor.run(
-        code=os.path.join(DATA_DIR, "dummy_script.py"),
+        code="main_script.py",
+        source_dir=code_bundle_path,
         inputs=[
             ProcessingInput(
                 source=input_file_path,
@@ -221,7 +223,7 @@ def test_sklearn_with_customizations(
 
     assert job_description["AppSpecification"]["ContainerArguments"] == ["-v"]
     assert job_description["AppSpecification"]["ContainerEntrypoint"] == [
-        "/bin/bash",
+        "/bin/sh",
         "/opt/ml/processing/input/entrypoint/runproc.sh",
     ]
     assert job_description["AppSpecification"]["ImageUri"] == image_uri
@@ -309,7 +311,7 @@ def test_sklearn_with_custom_default_bucket(
 
     assert job_description["AppSpecification"]["ContainerArguments"] == ["-v"]
     assert job_description["AppSpecification"]["ContainerEntrypoint"] == [
-        "/bin/bash",
+        "/bin/sh",
         "/opt/ml/processing/input/entrypoint/runproc.sh",
     ]
     assert job_description["AppSpecification"]["ImageUri"] == image_uri
@@ -362,7 +364,7 @@ def test_sklearn_with_no_inputs_or_outputs(
 
     assert job_description["AppSpecification"]["ContainerArguments"] == ["-v"]
     assert job_description["AppSpecification"]["ContainerEntrypoint"] == [
-        "/bin/bash",
+        "/bin/sh",
         "/opt/ml/processing/input/entrypoint/runproc.sh",
     ]
     assert job_description["AppSpecification"]["ImageUri"] == image_uri
@@ -452,6 +454,80 @@ def test_script_processor(sagemaker_session, image_uri, cpu_instance_type, outpu
     assert job_description["StoppingCondition"] == {"MaxRuntimeInSeconds": 3600}
 
 
+@pytest.mark.release
+def test_script_processor_with_source_dir(sagemaker_session, image_uri, cpu_instance_type, output_kms_key):
+    input_file_path = os.path.join(DATA_DIR, "dummy_input.txt")
+    source_dir=os.path.join(DATA_DIR, "dummy_code_bundle_no_reqs")
+
+    script_processor = ScriptProcessor(
+        role=ROLE,
+        image_uri=image_uri,
+        command=["python3"],
+        instance_count=1,
+        instance_type=cpu_instance_type,
+        volume_kms_key=None,
+        output_kms_key=output_kms_key,
+        max_runtime_in_seconds=600,
+        base_job_name="test-script-processor",
+        sagemaker_session=sagemaker_session,
+    )
+
+    script_processor.run(
+        code="main_script.py",
+        source_dir=source_dir,
+        inputs=[
+            ProcessingInput(
+                source=input_file_path,
+                destination="/opt/ml/processing/input/container/path/",
+                input_name="dummy_input",
+                s3_data_type="S3Prefix",
+                s3_input_mode="File",
+                s3_data_distribution_type="FullyReplicated",
+                s3_compression_type="None",
+            )
+        ],
+        outputs=[
+            ProcessingOutput(
+                source="/opt/ml/processing/output/container/path/",
+                output_name="dummy_output",
+                s3_upload_mode="EndOfJob",
+            )
+        ],
+        arguments=["-v"],
+        wait=True,
+        logs=True,
+    )
+
+    job_description = script_processor.latest_job.describe()
+
+    assert job_description["ProcessingInputs"][0]["InputName"] == "dummy_input"
+
+    assert job_description["ProcessingInputs"][1]["InputName"] == "code"
+
+    assert job_description["ProcessingJobName"].startswith("test-script-processor")
+
+    assert job_description["ProcessingJobStatus"] == "Completed"
+
+    assert job_description["ProcessingOutputConfig"]["KmsKeyId"] == output_kms_key
+    assert job_description["ProcessingOutputConfig"]["Outputs"][0]["OutputName"] == "dummy_output"
+
+    assert job_description["ProcessingResources"]["ClusterConfig"]["InstanceCount"] == 1
+    assert (
+        job_description["ProcessingResources"]["ClusterConfig"]["InstanceType"] == cpu_instance_type
+    )
+
+    assert job_description["AppSpecification"]["ContainerArguments"] == ["-v"]
+    assert job_description["AppSpecification"]["ContainerEntrypoint"] == [
+        "python3",
+        "/opt/ml/processing/input/code/main_script.py",
+    ]
+    assert job_description["AppSpecification"]["ImageUri"] == image_uri
+
+    assert ROLE in job_description["RoleArn"]
+
+    assert job_description["StoppingCondition"] == {"MaxRuntimeInSeconds": 600}
+
+
 def test_script_processor_with_no_inputs_or_outputs(
     sagemaker_session, image_uri, cpu_instance_type
 ):
@@ -680,7 +756,7 @@ def test_processor_with_custom_bucket(
 
 
 def test_sklearn_with_network_config(sagemaker_session, sklearn_latest_version, cpu_instance_type):
-    script_path = os.path.join(DATA_DIR, "dummy_script.py")
+    code_bundle_path = os.path.join(DATA_DIR, "dummy_code_bundle_no_reqs")
     input_file_path = os.path.join(DATA_DIR, "dummy_input.txt")
 
     sklearn_processor = SKLearnProcessor(
@@ -697,13 +773,17 @@ def test_sklearn_with_network_config(sagemaker_session, sklearn_latest_version,
     )
 
     sklearn_processor.run(
-        code=script_path,
+        code="main_script.py",
+        source_dir=code_bundle_path,
         inputs=[ProcessingInput(source=input_file_path, destination="/opt/ml/processing/inputs/")],
-        wait=False,
+        wait=True,
         logs=False,
     )
 
     job_description = sklearn_processor.latest_job.describe()
+
+    assert job_description["ProcessingJobStatus"] == "Completed"
+
     network_config = job_description["NetworkConfig"]
     assert network_config["EnableInterContainerTrafficEncryption"]
     assert network_config["EnableNetworkIsolation"]
diff --git a/tests/unit/sagemaker/huggingface/test_processing.py b/tests/unit/sagemaker/huggingface/test_processing.py
@@ -134,7 +134,7 @@ def _get_expected_args_modular_code(job_name, code_s3_uri=f"s3://{BUCKET_NAME}")
         "app_specification": {
             "ImageUri": CUSTOM_IMAGE_URI,
             "ContainerEntrypoint": [
-                "/bin/bash",
+                "/bin/sh",
                 "/opt/ml/processing/input/entrypoint/runproc.sh",
             ],
         },
diff --git a/tests/unit/test_processing.py b/tests/unit/test_processing.py
@@ -271,6 +271,7 @@ def test_sklearn_with_all_parameters_via_run_args(
 
         processor.run(
             code=run_args.code,
+            source_dir="/local/path/to/source_dir",
             inputs=run_args.inputs,
             outputs=run_args.outputs,
             arguments=run_args.arguments,
@@ -345,6 +346,7 @@ def test_sklearn_with_all_parameters_via_run_args_called_twice(
 
         processor.run(
             code=run_args.code,
+            source_dir="/local/path/to/source_dir",
             inputs=run_args.inputs,
             outputs=run_args.outputs,
             arguments=run_args.arguments,
@@ -974,7 +976,7 @@ def _get_expected_args_modular_code(job_name, code_s3_uri=f"s3://{BUCKET_NAME}")
         "app_specification": {
             "ImageUri": CUSTOM_IMAGE_URI,
             "ContainerEntrypoint": [
-                "/bin/bash",
+                "/bin/sh",
                 "/opt/ml/processing/input/entrypoint/runproc.sh",
             ],
         },
@@ -1215,7 +1217,7 @@ def _get_expected_args_all_parameters_modular_code(
             "ImageUri": "012345678901.dkr.ecr.us-west-2.amazonaws.com/my-custom-image-uri",
             "ContainerArguments": ["--drop-columns", "'SelfEmployed'"],
             "ContainerEntrypoint": [
-                "/bin/bash",
+                "/bin/sh",
                 "/opt/ml/processing/input/entrypoint/runproc.sh",
             ],
         },

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+"""A dummy Python module to check importing local files works OK"""`
	`2`	`+DUMMY_CONSTANT = 1`
Original file line number	Diff line number	Diff line change
`@@ -353,7 +353,7 @@ def test_local_processing_sklearn(sagemaker_local_session_no_local_code, sklearn`
`353`	`353`	`assert job_description["ProcessingResources"]["ClusterConfig"]["InstanceCount"] == 1`
`354`	`354`	`assert job_description["ProcessingResources"]["ClusterConfig"]["InstanceType"] == "local"`
`355`	`355`	`assert job_description["AppSpecification"]["ContainerEntrypoint"] == [`
`356`		`- "/bin/bash",`
	`356`	`+ "/bin/sh",`
`357`	`357`	`"/opt/ml/processing/input/entrypoint/runproc.sh",`
`358`	`358`	`]`
`359`	`359`	`assert job_description["RoleArn"] == "<no_role>"`