@@ -146,7 +146,7 @@ def test_sklearn(sagemaker_session, sklearn_latest_version, cpu_instance_type):
146
146
assert job_description ["ProcessingResources" ]["ClusterConfig" ]["VolumeSizeInGB" ] == 30
147
147
assert job_description ["StoppingCondition" ] == {"MaxRuntimeInSeconds" : 86400 }
148
148
assert job_description ["AppSpecification" ]["ContainerEntrypoint" ] == [
149
- "/bin/bash " ,
149
+ "/bin/sh " ,
150
150
"/opt/ml/processing/input/entrypoint/runproc.sh" ,
151
151
]
152
152
assert ROLE in job_description ["RoleArn" ]
@@ -157,6 +157,7 @@ def test_sklearn_with_customizations(
157
157
sagemaker_session , image_uri , sklearn_latest_version , cpu_instance_type , output_kms_key
158
158
):
159
159
input_file_path = os .path .join (DATA_DIR , "dummy_input.txt" )
160
+ code_bundle_path = os .path .join (DATA_DIR , "dummy_code_bundle_with_reqs" )
160
161
161
162
sklearn_processor = SKLearnProcessor (
162
163
framework_version = sklearn_latest_version ,
@@ -175,7 +176,8 @@ def test_sklearn_with_customizations(
175
176
)
176
177
177
178
sklearn_processor .run (
178
- code = os .path .join (DATA_DIR , "dummy_script.py" ),
179
+ code = "main_script.py" ,
180
+ source_dir = code_bundle_path ,
179
181
inputs = [
180
182
ProcessingInput (
181
183
source = input_file_path ,
@@ -221,7 +223,7 @@ def test_sklearn_with_customizations(
221
223
222
224
assert job_description ["AppSpecification" ]["ContainerArguments" ] == ["-v" ]
223
225
assert job_description ["AppSpecification" ]["ContainerEntrypoint" ] == [
224
- "/bin/bash " ,
226
+ "/bin/sh " ,
225
227
"/opt/ml/processing/input/entrypoint/runproc.sh" ,
226
228
]
227
229
assert job_description ["AppSpecification" ]["ImageUri" ] == image_uri
@@ -309,7 +311,7 @@ def test_sklearn_with_custom_default_bucket(
309
311
310
312
assert job_description ["AppSpecification" ]["ContainerArguments" ] == ["-v" ]
311
313
assert job_description ["AppSpecification" ]["ContainerEntrypoint" ] == [
312
- "/bin/bash " ,
314
+ "/bin/sh " ,
313
315
"/opt/ml/processing/input/entrypoint/runproc.sh" ,
314
316
]
315
317
assert job_description ["AppSpecification" ]["ImageUri" ] == image_uri
@@ -362,7 +364,7 @@ def test_sklearn_with_no_inputs_or_outputs(
362
364
363
365
assert job_description ["AppSpecification" ]["ContainerArguments" ] == ["-v" ]
364
366
assert job_description ["AppSpecification" ]["ContainerEntrypoint" ] == [
365
- "/bin/bash " ,
367
+ "/bin/sh " ,
366
368
"/opt/ml/processing/input/entrypoint/runproc.sh" ,
367
369
]
368
370
assert job_description ["AppSpecification" ]["ImageUri" ] == image_uri
@@ -452,6 +454,80 @@ def test_script_processor(sagemaker_session, image_uri, cpu_instance_type, outpu
452
454
assert job_description ["StoppingCondition" ] == {"MaxRuntimeInSeconds" : 3600 }
453
455
454
456
457
+ @pytest .mark .release
458
+ def test_script_processor_with_source_dir (sagemaker_session , image_uri , cpu_instance_type , output_kms_key ):
459
+ input_file_path = os .path .join (DATA_DIR , "dummy_input.txt" )
460
+ source_dir = os .path .join (DATA_DIR , "dummy_code_bundle_no_reqs" )
461
+
462
+ script_processor = ScriptProcessor (
463
+ role = ROLE ,
464
+ image_uri = image_uri ,
465
+ command = ["python3" ],
466
+ instance_count = 1 ,
467
+ instance_type = cpu_instance_type ,
468
+ volume_kms_key = None ,
469
+ output_kms_key = output_kms_key ,
470
+ max_runtime_in_seconds = 600 ,
471
+ base_job_name = "test-script-processor" ,
472
+ sagemaker_session = sagemaker_session ,
473
+ )
474
+
475
+ script_processor .run (
476
+ code = "main_script.py" ,
477
+ source_dir = source_dir ,
478
+ inputs = [
479
+ ProcessingInput (
480
+ source = input_file_path ,
481
+ destination = "/opt/ml/processing/input/container/path/" ,
482
+ input_name = "dummy_input" ,
483
+ s3_data_type = "S3Prefix" ,
484
+ s3_input_mode = "File" ,
485
+ s3_data_distribution_type = "FullyReplicated" ,
486
+ s3_compression_type = "None" ,
487
+ )
488
+ ],
489
+ outputs = [
490
+ ProcessingOutput (
491
+ source = "/opt/ml/processing/output/container/path/" ,
492
+ output_name = "dummy_output" ,
493
+ s3_upload_mode = "EndOfJob" ,
494
+ )
495
+ ],
496
+ arguments = ["-v" ],
497
+ wait = True ,
498
+ logs = True ,
499
+ )
500
+
501
+ job_description = script_processor .latest_job .describe ()
502
+
503
+ assert job_description ["ProcessingInputs" ][0 ]["InputName" ] == "dummy_input"
504
+
505
+ assert job_description ["ProcessingInputs" ][1 ]["InputName" ] == "code"
506
+
507
+ assert job_description ["ProcessingJobName" ].startswith ("test-script-processor" )
508
+
509
+ assert job_description ["ProcessingJobStatus" ] == "Completed"
510
+
511
+ assert job_description ["ProcessingOutputConfig" ]["KmsKeyId" ] == output_kms_key
512
+ assert job_description ["ProcessingOutputConfig" ]["Outputs" ][0 ]["OutputName" ] == "dummy_output"
513
+
514
+ assert job_description ["ProcessingResources" ]["ClusterConfig" ]["InstanceCount" ] == 1
515
+ assert (
516
+ job_description ["ProcessingResources" ]["ClusterConfig" ]["InstanceType" ] == cpu_instance_type
517
+ )
518
+
519
+ assert job_description ["AppSpecification" ]["ContainerArguments" ] == ["-v" ]
520
+ assert job_description ["AppSpecification" ]["ContainerEntrypoint" ] == [
521
+ "python3" ,
522
+ "/opt/ml/processing/input/code/main_script.py" ,
523
+ ]
524
+ assert job_description ["AppSpecification" ]["ImageUri" ] == image_uri
525
+
526
+ assert ROLE in job_description ["RoleArn" ]
527
+
528
+ assert job_description ["StoppingCondition" ] == {"MaxRuntimeInSeconds" : 600 }
529
+
530
+
455
531
def test_script_processor_with_no_inputs_or_outputs (
456
532
sagemaker_session , image_uri , cpu_instance_type
457
533
):
@@ -680,7 +756,7 @@ def test_processor_with_custom_bucket(
680
756
681
757
682
758
def test_sklearn_with_network_config (sagemaker_session , sklearn_latest_version , cpu_instance_type ):
683
- script_path = os .path .join (DATA_DIR , "dummy_script.py " )
759
+ code_bundle_path = os .path .join (DATA_DIR , "dummy_code_bundle_no_reqs " )
684
760
input_file_path = os .path .join (DATA_DIR , "dummy_input.txt" )
685
761
686
762
sklearn_processor = SKLearnProcessor (
@@ -697,13 +773,17 @@ def test_sklearn_with_network_config(sagemaker_session, sklearn_latest_version,
697
773
)
698
774
699
775
sklearn_processor .run (
700
- code = script_path ,
776
+ code = "main_script.py" ,
777
+ source_dir = code_bundle_path ,
701
778
inputs = [ProcessingInput (source = input_file_path , destination = "/opt/ml/processing/inputs/" )],
702
- wait = False ,
779
+ wait = True ,
703
780
logs = False ,
704
781
)
705
782
706
783
job_description = sklearn_processor .latest_job .describe ()
784
+
785
+ assert job_description ["ProcessingJobStatus" ] == "Completed"
786
+
707
787
network_config = job_description ["NetworkConfig" ]
708
788
assert network_config ["EnableInterContainerTrafficEncryption" ]
709
789
assert network_config ["EnableNetworkIsolation" ]
0 commit comments