28
28
29
29
30
30
@pytest .mark .release
31
- @pytest .mark .skipif (
32
- integ .test_region () in integ .TRAINING_NO_P2_REGIONS
33
- and integ .test_region () in integ .TRAINING_NO_P3_REGIONS ,
34
- reason = "no ml.p2 or ml.p3 instances in this region" ,
35
- )
36
31
@retry_with_instance_list (gpu_list (integ .test_region ()))
37
32
def test_framework_processing_job_with_deps (
38
33
sagemaker_session ,
39
34
huggingface_training_latest_version ,
40
35
huggingface_training_pytorch_latest_version ,
41
36
huggingface_pytorch_latest_training_py_version ,
42
- ** kwargs ,
37
+ gpu_pytorch_instance_type ,
43
38
):
44
39
with timeout (minutes = TRAINING_DEFAULT_TIMEOUT_MINUTES ):
45
40
code_path = os .path .join (DATA_DIR , "dummy_code_bundle_with_reqs" )
@@ -51,7 +46,7 @@ def test_framework_processing_job_with_deps(
51
46
py_version = huggingface_pytorch_latest_training_py_version ,
52
47
role = ROLE ,
53
48
instance_count = 1 ,
54
- instance_type = kwargs [ "instance_type" ] ,
49
+ instance_type = gpu_pytorch_instance_type ,
55
50
sagemaker_session = sagemaker_session ,
56
51
base_job_name = "test-huggingface" ,
57
52
)
@@ -64,18 +59,13 @@ def test_framework_processing_job_with_deps(
64
59
65
60
66
61
@pytest .mark .release
67
- @pytest .mark .skipif (
68
- integ .test_region () in integ .TRAINING_NO_P2_REGIONS
69
- and integ .test_region () in integ .TRAINING_NO_P3_REGIONS ,
70
- reason = "no ml.p2 or ml.p3 instances in this region" ,
71
- )
72
62
@retry_with_instance_list (gpu_list (integ .test_region ()))
73
63
def test_huggingface_training (
74
64
sagemaker_session ,
75
65
huggingface_training_latest_version ,
76
66
huggingface_training_pytorch_latest_version ,
77
67
huggingface_pytorch_latest_training_py_version ,
78
- ** kwargs ,
68
+ gpu_pytorch_instance_type ,
79
69
):
80
70
with timeout (minutes = TRAINING_DEFAULT_TIMEOUT_MINUTES ):
81
71
data_path = os .path .join (DATA_DIR , "huggingface" )
@@ -87,7 +77,7 @@ def test_huggingface_training(
87
77
transformers_version = huggingface_training_latest_version ,
88
78
pytorch_version = huggingface_training_pytorch_latest_version ,
89
79
instance_count = 1 ,
90
- instance_type = kwargs [ "instance_type" ] ,
80
+ instance_type = gpu_pytorch_instance_type ,
91
81
hyperparameters = {
92
82
"model_name_or_path" : "distilbert-base-cased" ,
93
83
"task_name" : "wnli" ,
@@ -111,17 +101,12 @@ def test_huggingface_training(
111
101
112
102
113
103
@pytest .mark .release
114
- @pytest .mark .skipif (
115
- integ .test_region () in integ .TRAINING_NO_P2_REGIONS
116
- and integ .test_region () in integ .TRAINING_NO_P3_REGIONS ,
117
- reason = "no ml.p2 or ml.p3 instances in this region" ,
118
- )
119
104
@pytest .mark .skip (
120
105
reason = "need to re enable it later t.corp:V609860141" ,
121
106
)
122
107
def test_huggingface_training_tf (
123
108
sagemaker_session ,
124
- gpu_instance_type ,
109
+ gpu_pytorch_instance_type ,
125
110
huggingface_training_latest_version ,
126
111
huggingface_training_tensorflow_latest_version ,
127
112
huggingface_tensorflow_latest_training_py_version ,
@@ -136,7 +121,7 @@ def test_huggingface_training_tf(
136
121
transformers_version = huggingface_training_latest_version ,
137
122
tensorflow_version = huggingface_training_tensorflow_latest_version ,
138
123
instance_count = 1 ,
139
- instance_type = gpu_instance_type ,
124
+ instance_type = gpu_pytorch_instance_type ,
140
125
hyperparameters = {
141
126
"model_name_or_path" : "distilbert-base-cased" ,
142
127
"per_device_train_batch_size" : 128 ,
@@ -161,7 +146,7 @@ def test_huggingface_training_tf(
161
146
)
162
147
def test_huggingface_inference (
163
148
sagemaker_session ,
164
- gpu_instance_type ,
149
+ gpu_pytorch_instance_type ,
165
150
huggingface_inference_latest_version ,
166
151
huggingface_inference_pytorch_latest_version ,
167
152
huggingface_pytorch_latest_inference_py_version ,
@@ -182,7 +167,7 @@ def test_huggingface_inference(
182
167
)
183
168
with timeout_and_delete_endpoint_by_name (endpoint_name , sagemaker_session ):
184
169
model .deploy (
185
- instance_type = gpu_instance_type , initial_instance_count = 1 , endpoint_name = endpoint_name
170
+ instance_type = gpu_pytorch_instance_type , initial_instance_count = 1 , endpoint_name = endpoint_name
186
171
)
187
172
188
173
predictor = HuggingFacePredictor (endpoint_name = endpoint_name )
0 commit comments