Skip to content

breaking: default wait=True for HyperparameterTuner.fit() and Transformer.transform() #1790

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/sagemaker/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ def transform(
join_source=None,
experiment_config=None,
model_client_config=None,
wait=False,
logs=False,
wait=True,
logs=True,
):
"""Start a new transform job.

Expand Down Expand Up @@ -178,9 +178,9 @@ def transform(
'InvocationsTimeoutInSeconds', and 'InvocationsMaxRetries'.
(default: ``None``).
wait (bool): Whether the call should wait until the job completes
(default: False).
(default: ``True``).
logs (bool): Whether to show the logs produced by the job.
Only meaningful when wait is True (default: False).
Only meaningful when wait is ``True`` (default: ``True``).
"""
local_mode = self.sagemaker_session.local_mode
if not local_mode and not data.startswith("s3://"):
Expand Down
8 changes: 5 additions & 3 deletions src/sagemaker/tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@ def fit(
job_name=None,
include_cls_metadata=False,
estimator_kwargs=None,
wait=True,
**kwargs
):
"""Start a hyperparameter tuning job.
Expand Down Expand Up @@ -424,6 +425,7 @@ def fit(
The keys are the estimator names for the estimator_dict argument of create()
method. Each value is a dictionary for the other arguments needed for training
of the corresponding estimator.
wait (bool): Whether the call should wait until the job completes (default: ``True``).
**kwargs: Other arguments needed for training. Please refer to the
``fit()`` method of the associated estimator to see what other
arguments are needed.
Expand All @@ -433,6 +435,9 @@ def fit(
else:
self._fit_with_estimator_dict(inputs, job_name, include_cls_metadata, estimator_kwargs)

if wait:
self.latest_tuning_job.wait()

def _fit_with_estimator(self, inputs, job_name, include_cls_metadata, **kwargs):
"""Start tuning for tuner instances that have the ``estimator`` field set"""
self._prepare_estimator_for_tuning(self.estimator, inputs, job_name, **kwargs)
Expand Down Expand Up @@ -1447,9 +1452,6 @@ def start_new(cls, tuner, inputs):
sagemaker.tuner._TuningJob: Constructed object that captures all
information about the started job.
"""

logger.info("_TuningJob.start_new!!!")

warm_start_config_req = None
if tuner.warm_start_config:
warm_start_config_req = tuner.warm_start_config.to_input_req()
Expand Down
2 changes: 1 addition & 1 deletion tests/integ/test_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def test_single_transformer_multiple_jobs(

def test_stop_transform_job(mxnet_estimator, mxnet_transform_input, cpu_instance_type):
transformer = mxnet_estimator.transformer(1, cpu_instance_type)
transformer.transform(mxnet_transform_input, content_type="text/csv")
transformer.transform(mxnet_transform_input, content_type="text/csv", wait=False)

time.sleep(15)

Expand Down
60 changes: 14 additions & 46 deletions tests/integ/test_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def _tune(
hyperparameter_ranges=None,
job_name=None,
warm_start_config=None,
wait_till_terminal=True,
wait=True,
max_jobs=2,
max_parallel_jobs=2,
early_stopping_type="Off",
Expand All @@ -152,11 +152,8 @@ def _tune(
records = kmeans_estimator.record_set(kmeans_train_set[0][:100])
test_record_set = kmeans_estimator.record_set(kmeans_train_set[0][:100], channel="test")

tuner.fit([records, test_record_set], job_name=job_name)
print("Started hyperparameter tuning job with name:" + tuner.latest_tuning_job.name)

if wait_till_terminal:
tuner.wait()
print("Started hyperparameter tuning job with name: {}".format(job_name))
tuner.fit([records, test_record_set], job_name=job_name, wait=wait)

return tuner

Expand Down Expand Up @@ -388,7 +385,7 @@ def test_tuning_kmeans_identical_dataset_algorithm_tuner_from_non_terminal_paren
kmeans_train_set,
job_name=parent_tuning_job_name,
hyperparameter_ranges=hyperparameter_ranges,
wait_till_terminal=False,
wait=False,
max_parallel_jobs=1,
max_jobs=1,
)
Expand Down Expand Up @@ -453,15 +450,9 @@ def test_tuning_lda(sagemaker_session, cpu_instance_type):
)

tuning_job_name = unique_name_from_base("test-lda", max_length=32)
print("Started hyperparameter tuning job with name:" + tuning_job_name)
tuner.fit([record_set, test_record_set], mini_batch_size=1, job_name=tuning_job_name)

latest_tuning_job_name = tuner.latest_tuning_job.name

print("Started hyperparameter tuning job with name:" + latest_tuning_job_name)

time.sleep(15)
tuner.wait()

attached_tuner = HyperparameterTuner.attach(
tuning_job_name, sagemaker_session=sagemaker_session
)
Expand Down Expand Up @@ -516,7 +507,7 @@ def test_stop_tuning_job(sagemaker_session, cpu_instance_type):
)

tuning_job_name = unique_name_from_base("test-randomcutforest", max_length=32)
tuner.fit([records, test_records], tuning_job_name)
tuner.fit([records, test_records], tuning_job_name, wait=False)

time.sleep(15)

Expand Down Expand Up @@ -575,12 +566,8 @@ def test_tuning_mxnet(
)

tuning_job_name = unique_name_from_base("tune-mxnet", max_length=32)
tuner.fit({"train": train_input, "test": test_input}, job_name=tuning_job_name)

print("Started hyperparameter tuning job with name:" + tuning_job_name)

time.sleep(15)
tuner.wait()
tuner.fit({"train": train_input, "test": test_input}, job_name=tuning_job_name)

best_training_job = tuner.best_training_job()
with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
Expand Down Expand Up @@ -628,12 +615,8 @@ def test_tuning_tf(
)

tuning_job_name = unique_name_from_base("tune-tf", max_length=32)
tuner.fit(inputs, job_name=tuning_job_name)

print("Started hyperparameter tuning job with name: " + tuning_job_name)

time.sleep(15)
tuner.wait()
tuner.fit(inputs, job_name=tuning_job_name)


def test_tuning_tf_vpc_multi(
Expand Down Expand Up @@ -686,12 +669,8 @@ def test_tuning_tf_vpc_multi(
)

tuning_job_name = unique_name_from_base("tune-tf", max_length=32)
tuner.fit(inputs, job_name=tuning_job_name)

print(f"Started hyperparameter tuning job with name: {tuning_job_name}")

time.sleep(15)
tuner.wait()
tuner.fit(inputs, job_name=tuning_job_name)


@pytest.mark.canary_quick
Expand Down Expand Up @@ -740,13 +719,9 @@ def test_tuning_chainer(
)

tuning_job_name = unique_name_from_base("chainer", max_length=32)
print("Started hyperparameter tuning job with name: {}".format(tuning_job_name))
tuner.fit({"train": train_input, "test": test_input}, job_name=tuning_job_name)

print("Started hyperparameter tuning job with name:" + tuning_job_name)

time.sleep(15)
tuner.wait()

best_training_job = tuner.best_training_job()
with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
predictor = tuner.deploy(1, cpu_instance_type)
Expand Down Expand Up @@ -812,13 +787,9 @@ def test_attach_tuning_pytorch(
)

tuning_job_name = unique_name_from_base("pytorch", max_length=32)
print("Started hyperparameter tuning job with name: {}".format(tuning_job_name))
tuner.fit({"training": training_data}, job_name=tuning_job_name)

print("Started hyperparameter tuning job with name:" + tuning_job_name)

time.sleep(15)
tuner.wait()

endpoint_name = tuning_job_name
model_name = "model-name-1"
attached_tuner = HyperparameterTuner.attach(
Expand Down Expand Up @@ -887,17 +858,14 @@ def test_tuning_byo_estimator(sagemaker_session, cpu_instance_type):
max_parallel_jobs=2,
)

tuning_job_name = unique_name_from_base("byo", 32)
print("Started hyperparameter tuning job with name {}:".format(tuning_job_name))
tuner.fit(
{"train": s3_train_data, "test": s3_train_data},
include_cls_metadata=False,
job_name=unique_name_from_base("byo", 32),
job_name=tuning_job_name,
)

print("Started hyperparameter tuning job with name:" + tuner.latest_tuning_job.name)

time.sleep(15)
tuner.wait()

best_training_job = tuner.best_training_job()
with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
predictor = tuner.deploy(
Expand Down