Skip to content

Commit a846b32

Browse files
author
EC2 Default User
committed
Added doc strings to entities.py, implemented integ tests and fixed two bugs.
1 parent 520c101 commit a846b32

File tree

3 files changed

+133
-11
lines changed

3 files changed

+133
-11
lines changed

src/sagemaker/local/entities.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939

4040

4141
class _LocalProcessingJob(object):
42-
"""Placeholder docstring"""
42+
"""Defines and starts a local processing job."""
4343

4444
_STARTING = "Starting"
4545
_PROCESSING = "Processing"
@@ -49,7 +49,7 @@ class _LocalProcessingJob(object):
4949
def __init__(self, container):
5050
"""
5151
Args:
52-
container:
52+
container: the local container object.
5353
"""
5454
self.container = container
5555
self.state = "Created"
@@ -63,10 +63,10 @@ def __init__(self, container):
6363
def start(self, processing_inputs, processing_output_config, environment, processing_job_name):
6464
"""
6565
Args:
66-
processing_inputs:
67-
processing_output_config:
68-
environment:
69-
processing_job_name:
66+
processing_inputs: The processing input configuration.
67+
processing_output_config: The processing input configuration.
68+
environment: The collection of environment variables passed to the job.
69+
processing_job_name: The processing job name.
7070
"""
7171

7272
for item in processing_inputs:
@@ -132,11 +132,12 @@ def start(self, processing_inputs, processing_output_config, environment, proces
132132
self.container.process(
133133
processing_inputs, processing_output_config, environment, processing_job_name
134134
)
135+
135136
self.end_time = datetime.datetime.now()
136137
self.state = self._COMPLETED
137138

138139
def describe(self):
139-
"""Placeholder docstring"""
140+
"""Describes a local processing job."""
140141

141142
response = {
142143
"ProcessingJobArn": self.processing_job_name,

src/sagemaker/local/image.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,6 @@ def process(
143143
host, environment, processing_inputs, processing_output_config, processing_job_name
144144
)
145145

146-
# Adding region name environment variable.
147-
environment[REGION_ENV_NAME] = self.sagemaker_session.boto_region_name
148-
149146
self._generate_compose_file(
150147
"process", additional_volumes=volumes, additional_env_vars=environment
151148
)
@@ -728,7 +725,7 @@ def _create_docker_host(self, host, environment, optml_subdirs, command, volumes
728725
if self.container_entrypoint:
729726
host_config["entrypoint"] = self.container_entrypoint
730727
if self.container_arguments:
731-
host_config["entrypoint"] += self.container_arguments
728+
host_config["entrypoint"] = host_config["entrypoint"] + self.container_arguments
732729

733730
# for GPU support pass in nvidia as the runtime, this is equivalent
734731
# to setting --runtime=nvidia in the docker commandline.

tests/integ/test_local_mode.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@
2525
import tests.integ.lock as lock
2626
from tests.integ import DATA_DIR
2727

28+
from sagemaker import image_uris
29+
30+
from sagemaker.processing import ProcessingInput, ProcessingOutput, ScriptProcessor
31+
from sagemaker.sklearn.processing import SKLearnProcessor
32+
2833
from sagemaker.local import LocalSession, LocalSagemakerRuntimeClient, LocalSagemakerClient
2934
from sagemaker.mxnet import MXNet
3035

@@ -53,6 +58,22 @@ def _initialize(self, boto_session, sagemaker_client, sagemaker_runtime_client,
5358
self.local_mode = True
5459

5560

61+
@pytest.fixture(scope="module")
62+
def image_uri(
63+
sklearn_latest_version,
64+
sklearn_latest_py_version,
65+
cpu_instance_type,
66+
sagemaker_session,
67+
):
68+
return image_uris.retrieve(
69+
"sklearn",
70+
sagemaker_session.boto_region_name,
71+
version=sklearn_latest_version,
72+
py_version=sklearn_latest_py_version,
73+
instance_type=cpu_instance_type,
74+
)
75+
76+
5677
@pytest.fixture(scope="module")
5778
def mxnet_model(
5879
sagemaker_local_session, mxnet_inference_latest_version, mxnet_inference_latest_py_version
@@ -298,3 +319,106 @@ def test_local_transform_mxnet(
298319
transformer.wait()
299320

300321
assert os.path.exists(os.path.join(str(tmpdir), "data.csv.out"))
322+
323+
324+
@pytest.mark.local_mode
325+
def test_local_processing_sklearn(sagemaker_local_session, sklearn_latest_version):
326+
script_path = os.path.join(DATA_DIR, "dummy_script.py")
327+
input_file_path = os.path.join(DATA_DIR, "dummy_input.txt")
328+
329+
sklearn_processor = SKLearnProcessor(
330+
framework_version=sklearn_latest_version,
331+
role="SageMakerRole",
332+
instance_type="local",
333+
instance_count=1,
334+
command=["python3"],
335+
sagemaker_session=sagemaker_local_session,
336+
)
337+
338+
sklearn_processor.run(
339+
code=script_path,
340+
inputs=[ProcessingInput(source=input_file_path, destination="/opt/ml/processing/inputs/")],
341+
wait=False,
342+
logs=False,
343+
)
344+
345+
job_description = sklearn_processor.latest_job.describe()
346+
347+
assert len(job_description["ProcessingInputs"]) == 2
348+
assert job_description["ProcessingResources"]["ClusterConfig"]["InstanceCount"] == 1
349+
assert job_description["ProcessingResources"]["ClusterConfig"]["InstanceType"] == "local"
350+
assert job_description["AppSpecification"]["ContainerEntrypoint"] == [
351+
"python3",
352+
"/opt/ml/processing/input/code/dummy_script.py",
353+
]
354+
assert job_description["RoleArn"] == "<no_role>"
355+
356+
357+
@pytest.mark.local_mode
358+
def test_local_processing_script_processor(sagemaker_local_session, image_uri):
359+
input_file_path = os.path.join(DATA_DIR, "dummy_input.txt")
360+
361+
script_processor = ScriptProcessor(
362+
role="SageMakerRole",
363+
image_uri=image_uri,
364+
command=["python3"],
365+
instance_count=1,
366+
instance_type="local",
367+
volume_size_in_gb=30,
368+
volume_kms_key=None,
369+
max_runtime_in_seconds=3600,
370+
base_job_name="test-script-processor",
371+
env={"DUMMY_ENVIRONMENT_VARIABLE": "dummy-value"},
372+
tags=[{"Key": "dummy-tag", "Value": "dummy-tag-value"}],
373+
sagemaker_session=sagemaker_local_session,
374+
)
375+
376+
script_processor.run(
377+
code=os.path.join(DATA_DIR, "dummy_script.py"),
378+
inputs=[
379+
ProcessingInput(
380+
source=input_file_path,
381+
destination="/opt/ml/processing/input/container/path/",
382+
input_name="dummy_input",
383+
s3_data_type="S3Prefix",
384+
s3_input_mode="File",
385+
s3_data_distribution_type="FullyReplicated",
386+
s3_compression_type="None",
387+
)
388+
],
389+
outputs=[
390+
ProcessingOutput(
391+
source="/opt/ml/processing/output/container/path/",
392+
output_name="dummy_output",
393+
s3_upload_mode="EndOfJob",
394+
)
395+
],
396+
arguments=["-v"],
397+
wait=True,
398+
logs=True,
399+
)
400+
401+
job_description = script_processor.latest_job.describe()
402+
403+
assert job_description["ProcessingInputs"][0]["InputName"] == "dummy_input"
404+
405+
assert job_description["ProcessingInputs"][1]["InputName"] == "code"
406+
407+
assert job_description["ProcessingJobName"].startswith("test-script-processor")
408+
409+
assert job_description["ProcessingJobStatus"] == "Completed"
410+
411+
assert job_description["ProcessingOutputConfig"]["Outputs"][0]["OutputName"] == "dummy_output"
412+
413+
assert job_description["ProcessingResources"]["ClusterConfig"]["InstanceCount"] == 1
414+
assert job_description["ProcessingResources"]["ClusterConfig"]["InstanceType"] == "local"
415+
assert job_description["ProcessingResources"]["ClusterConfig"]["VolumeSizeInGB"] == 30
416+
417+
assert job_description["AppSpecification"]["ContainerArguments"] == ["-v"]
418+
assert job_description["AppSpecification"]["ContainerEntrypoint"] == [
419+
"python3",
420+
"/opt/ml/processing/input/code/dummy_script.py",
421+
]
422+
assert job_description["AppSpecification"]["ImageUri"] == image_uri
423+
424+
assert job_description["Environment"] == {"DUMMY_ENVIRONMENT_VARIABLE": "dummy-value"}

0 commit comments

Comments
 (0)