|
25 | 25 | import tests.integ.lock as lock
|
26 | 26 | from tests.integ import DATA_DIR
|
27 | 27 |
|
| 28 | +from sagemaker import image_uris |
| 29 | + |
| 30 | +from sagemaker.processing import ProcessingInput, ProcessingOutput, ScriptProcessor |
| 31 | +from sagemaker.sklearn.processing import SKLearnProcessor |
| 32 | + |
28 | 33 | from sagemaker.local import LocalSession, LocalSagemakerRuntimeClient, LocalSagemakerClient
|
29 | 34 | from sagemaker.mxnet import MXNet
|
30 | 35 |
|
@@ -53,6 +58,22 @@ def _initialize(self, boto_session, sagemaker_client, sagemaker_runtime_client,
|
53 | 58 | self.local_mode = True
|
54 | 59 |
|
55 | 60 |
|
| 61 | +@pytest.fixture(scope="module") |
| 62 | +def image_uri( |
| 63 | + sklearn_latest_version, |
| 64 | + sklearn_latest_py_version, |
| 65 | + cpu_instance_type, |
| 66 | + sagemaker_session, |
| 67 | +): |
| 68 | + return image_uris.retrieve( |
| 69 | + "sklearn", |
| 70 | + sagemaker_session.boto_region_name, |
| 71 | + version=sklearn_latest_version, |
| 72 | + py_version=sklearn_latest_py_version, |
| 73 | + instance_type=cpu_instance_type, |
| 74 | + ) |
| 75 | + |
| 76 | + |
56 | 77 | @pytest.fixture(scope="module")
|
57 | 78 | def mxnet_model(
|
58 | 79 | sagemaker_local_session, mxnet_inference_latest_version, mxnet_inference_latest_py_version
|
@@ -298,3 +319,106 @@ def test_local_transform_mxnet(
|
298 | 319 | transformer.wait()
|
299 | 320 |
|
300 | 321 | assert os.path.exists(os.path.join(str(tmpdir), "data.csv.out"))
|
| 322 | + |
| 323 | + |
| 324 | +@pytest.mark.local_mode |
| 325 | +def test_local_processing_sklearn(sagemaker_local_session, sklearn_latest_version): |
| 326 | + script_path = os.path.join(DATA_DIR, "dummy_script.py") |
| 327 | + input_file_path = os.path.join(DATA_DIR, "dummy_input.txt") |
| 328 | + |
| 329 | + sklearn_processor = SKLearnProcessor( |
| 330 | + framework_version=sklearn_latest_version, |
| 331 | + role="SageMakerRole", |
| 332 | + instance_type="local", |
| 333 | + instance_count=1, |
| 334 | + command=["python3"], |
| 335 | + sagemaker_session=sagemaker_local_session, |
| 336 | + ) |
| 337 | + |
| 338 | + sklearn_processor.run( |
| 339 | + code=script_path, |
| 340 | + inputs=[ProcessingInput(source=input_file_path, destination="/opt/ml/processing/inputs/")], |
| 341 | + wait=False, |
| 342 | + logs=False, |
| 343 | + ) |
| 344 | + |
| 345 | + job_description = sklearn_processor.latest_job.describe() |
| 346 | + |
| 347 | + assert len(job_description["ProcessingInputs"]) == 2 |
| 348 | + assert job_description["ProcessingResources"]["ClusterConfig"]["InstanceCount"] == 1 |
| 349 | + assert job_description["ProcessingResources"]["ClusterConfig"]["InstanceType"] == "local" |
| 350 | + assert job_description["AppSpecification"]["ContainerEntrypoint"] == [ |
| 351 | + "python3", |
| 352 | + "/opt/ml/processing/input/code/dummy_script.py", |
| 353 | + ] |
| 354 | + assert job_description["RoleArn"] == "<no_role>" |
| 355 | + |
| 356 | + |
| 357 | +@pytest.mark.local_mode |
| 358 | +def test_local_processing_script_processor(sagemaker_local_session, image_uri): |
| 359 | + input_file_path = os.path.join(DATA_DIR, "dummy_input.txt") |
| 360 | + |
| 361 | + script_processor = ScriptProcessor( |
| 362 | + role="SageMakerRole", |
| 363 | + image_uri=image_uri, |
| 364 | + command=["python3"], |
| 365 | + instance_count=1, |
| 366 | + instance_type="local", |
| 367 | + volume_size_in_gb=30, |
| 368 | + volume_kms_key=None, |
| 369 | + max_runtime_in_seconds=3600, |
| 370 | + base_job_name="test-script-processor", |
| 371 | + env={"DUMMY_ENVIRONMENT_VARIABLE": "dummy-value"}, |
| 372 | + tags=[{"Key": "dummy-tag", "Value": "dummy-tag-value"}], |
| 373 | + sagemaker_session=sagemaker_local_session, |
| 374 | + ) |
| 375 | + |
| 376 | + script_processor.run( |
| 377 | + code=os.path.join(DATA_DIR, "dummy_script.py"), |
| 378 | + inputs=[ |
| 379 | + ProcessingInput( |
| 380 | + source=input_file_path, |
| 381 | + destination="/opt/ml/processing/input/container/path/", |
| 382 | + input_name="dummy_input", |
| 383 | + s3_data_type="S3Prefix", |
| 384 | + s3_input_mode="File", |
| 385 | + s3_data_distribution_type="FullyReplicated", |
| 386 | + s3_compression_type="None", |
| 387 | + ) |
| 388 | + ], |
| 389 | + outputs=[ |
| 390 | + ProcessingOutput( |
| 391 | + source="/opt/ml/processing/output/container/path/", |
| 392 | + output_name="dummy_output", |
| 393 | + s3_upload_mode="EndOfJob", |
| 394 | + ) |
| 395 | + ], |
| 396 | + arguments=["-v"], |
| 397 | + wait=True, |
| 398 | + logs=True, |
| 399 | + ) |
| 400 | + |
| 401 | + job_description = script_processor.latest_job.describe() |
| 402 | + |
| 403 | + assert job_description["ProcessingInputs"][0]["InputName"] == "dummy_input" |
| 404 | + |
| 405 | + assert job_description["ProcessingInputs"][1]["InputName"] == "code" |
| 406 | + |
| 407 | + assert job_description["ProcessingJobName"].startswith("test-script-processor") |
| 408 | + |
| 409 | + assert job_description["ProcessingJobStatus"] == "Completed" |
| 410 | + |
| 411 | + assert job_description["ProcessingOutputConfig"]["Outputs"][0]["OutputName"] == "dummy_output" |
| 412 | + |
| 413 | + assert job_description["ProcessingResources"]["ClusterConfig"]["InstanceCount"] == 1 |
| 414 | + assert job_description["ProcessingResources"]["ClusterConfig"]["InstanceType"] == "local" |
| 415 | + assert job_description["ProcessingResources"]["ClusterConfig"]["VolumeSizeInGB"] == 30 |
| 416 | + |
| 417 | + assert job_description["AppSpecification"]["ContainerArguments"] == ["-v"] |
| 418 | + assert job_description["AppSpecification"]["ContainerEntrypoint"] == [ |
| 419 | + "python3", |
| 420 | + "/opt/ml/processing/input/code/dummy_script.py", |
| 421 | + ] |
| 422 | + assert job_description["AppSpecification"]["ImageUri"] == image_uri |
| 423 | + |
| 424 | + assert job_description["Environment"] == {"DUMMY_ENVIRONMENT_VARIABLE": "dummy-value"} |
0 commit comments