knakad
diff --git a/‎.flake8
Lines changed: 2 additions & 0 deletions b/‎.flake8
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/sagemaker/analytics.py
Lines changed: 53 additions & 3 deletions b/‎src/sagemaker/analytics.py
Lines changed: 53 additions & 3 deletions
diff --git a/‎src/sagemaker/session.py
Lines changed: 308 additions & 40 deletions b/‎src/sagemaker/session.py
Lines changed: 308 additions & 40 deletions
diff --git a/‎src/sagemaker/tensorflow/predictor.py
Lines changed: 0 additions & 1 deletion b/‎src/sagemaker/tensorflow/predictor.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/sagemaker/tuner.py
Lines changed: 795 additions & 126 deletions b/‎src/sagemaker/tuner.py
Lines changed: 795 additions & 126 deletions
diff --git a/‎src/sagemaker/workflow/airflow.py
Lines changed: 164 additions & 41 deletions b/‎src/sagemaker/workflow/airflow.py
Lines changed: 164 additions & 41 deletions
@@ -1,3 +1,5 @@
 [flake8]
 application_import_names = sagemaker, tests
 import-order-style = google
+per-file-ignores =
+    tests/unit/test_tuner.py: F405
@@ -142,6 +142,8 @@ def reshape(training_summary):
             out["TrainingEndTime"] = end_time
             if start_time and end_time:
                 out["TrainingElapsedTimeSeconds"] = (end_time - start_time).total_seconds()
+            if "TrainingJobDefinitionName" in training_summary:
+                out["TrainingJobDefinitionName"] = training_summary["TrainingJobDefinitionName"]
             return out
 
         # Run that helper over all the summaries.
@@ -152,11 +154,59 @@ def reshape(training_summary):
     def tuning_ranges(self):
         """A dictionary describing the ranges of all tuned hyperparameters. The
         keys are the names of the hyperparameter, and the values are the ranges.
+
+        The output can take one of two forms:
+
+            * If the 'TrainingJobDefinition' field is present in the job description, the output
+                is a dictionary constructed from 'ParameterRanges' in
+                'HyperParameterTuningJobConfig' of the job description. The keys are the
+                parameter names, while the values are the parameter ranges.
+                Example:
+                >>> {
+                >>>     "eta": {"MaxValue": "1", "MinValue": "0", "Name": "eta"},
+                >>>     "gamma": {"MaxValue": "10", "MinValue": "0", "Name": "gamma"},
+                >>>     "iterations": {"MaxValue": "100", "MinValue": "50", "Name": "iterations"},
+                >>>     "num_layers": {"MaxValue": "30", "MinValue": "5", "Name": "num_layers"},
+                >>> }
+            * If the 'TrainingJobDefinitions' field (list) is present in the job description,
+                the output is a dictionary with keys as the 'DefinitionName' values from
+                all items in 'TrainingJobDefinitions', and each value would be a dictionary
+                constructed from 'HyperParameterRanges' in each item in 'TrainingJobDefinitions'
+                in the same format as above
+                Example:
+                >>> {
+                >>>     "estimator_1": {
+                >>>         "eta": {"MaxValue": "1", "MinValue": "0", "Name": "eta"},
+                >>>         "gamma": {"MaxValue": "10", "MinValue": "0", "Name": "gamma"},
+                >>>     },
+                >>>     "estimator_2": {
+                >>>         "framework": {"Values": ["TF", "MXNet"], "Name": "framework"},
+                >>>         "gamma": {"MaxValue": "1.0", "MinValue": "0.2", "Name": "gamma"}
+                >>>     }
+                >>> }
+
+        For more details about the 'TrainingJobDefinition' and 'TrainingJobDefinitions' fields
+        in job description, see
+        https://botocore.readthedocs.io/en/latest/reference/services/sagemaker.html#SageMaker.Client.create_hyper_parameter_tuning_job
         """
+        description = self.description()
+
+        if "TrainingJobDefinition" in description:
+            return self._prepare_parameter_ranges(
+                description["HyperParameterTuningJobConfig"]["ParameterRanges"]
+            )
+
+        return {
+            training_job_definition["DefinitionName"]: self._prepare_parameter_ranges(
+                training_job_definition["HyperParameterRanges"]
+            )
+            for training_job_definition in description["TrainingJobDefinitions"]
+        }
+
+    def _prepare_parameter_ranges(self, parameter_ranges):
+        """Convert parameter ranges a dictionary using the parameter range names as the keys"""
         out = {}
-        for _, ranges in self.description()["HyperParameterTuningJobConfig"][
-            "ParameterRanges"
-        ].items():
+        for _, ranges in parameter_ranges.items():
             for param in ranges:
                 out[param["Name"]] = param
         return out
 
@@ -18,7 +18,6 @@
 import google.protobuf.json_format as json_format
 from google.protobuf.message import DecodeError
 from protobuf_to_dict import protobuf_to_dict
-
 from sagemaker.content_types import CONTENT_TYPE_JSON, CONTENT_TYPE_OCTET_STREAM, CONTENT_TYPE_CSV
 from sagemaker.predictor import json_serializer, csv_serializer
 
 
@@ -239,8 +239,8 @@ def training_config(estimator, inputs=None, job_name=None, mini_batch_size=None)
     return train_config
 
 
-def tuning_config(tuner, inputs, job_name=None):
-    """Export Airflow tuning config from an estimator
+def tuning_config(tuner, inputs, job_name=None, include_cls_metadata=False, mini_batch_size=None):
+    """Export Airflow tuning config from a HyperparameterTuner
 
     Args:
         tuner (sagemaker.tuner.HyperparameterTuner): The tuner to export tuning
@@ -266,64 +266,187 @@ def tuning_config(tuner, inputs, job_name=None):
             * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of
                   :class:~`sagemaker.amazon.amazon_estimator.RecordSet` objects,
                   where each instance is a different channel of training data.
+
+            * (dict[str, one the forms above]): Required by only tuners created via
+                  the factory method ``HyperparameterTuner.create()``. The keys should be the
+                  same estimator names as keys for the ``estimator_dict`` argument of the
+                  ``HyperparameterTuner.create()`` method.
         job_name (str): Specify a tuning job name if needed.
+        include_cls_metadata: It can take one of the following two forms.
+
+            * (bool) - Whether or not the hyperparameter tuning job should include information
+                about the estimator class (default: False). This information is passed as a
+                hyperparameter, so if the algorithm you are using cannot handle unknown
+                hyperparameters (e.g. an Amazon SageMaker built-in algorithm that does not
+                have a custom estimator in the Python SDK), then set ``include_cls_metadata``
+                to ``False``.
+            * (dict[str, bool]) - This version should be used for tuners created via the factory
+                method ``HyperparameterTuner.create()``, to specify the flag for individual
+                estimators provided in the ``estimator_dict`` argument of the method. The keys
+                would be the same estimator names as in ``estimator_dict``. If one estimator
+                doesn't need the flag set, then no need to include it in the dictionary. If none
+                of the estimators need the flag set, then an empty dictionary ``{}`` must be used.
+
+        mini_batch_size: It can take one of the following two forms.
+
+            * (int) - Specify this argument only when estimator is a built-in estimator of an
+                Amazon algorithm. For other estimators, batch size should be specified in the
+                estimator.
+            * (dict[str, int]) - This version should be used for tuners created via the factory
+                method ``HyperparameterTuner.create()``, to specify the value for individual
+                estimators provided in the ``estimator_dict`` argument of the method. The keys
+                would be the same estimator names as in ``estimator_dict``. If one estimator
+                doesn't need the value set, then no need to include it in the dictionary. If
+                none of the estimators need the value set, then an empty dictionary ``{}``
+                must be used.
 
     Returns:
         dict: Tuning config that can be directly used by SageMakerTuningOperator in Airflow.
     """
-    train_config = training_base_config(tuner.estimator, inputs)
-    hyperparameters = train_config.pop("HyperParameters", None)
-    s3_operations = train_config.pop("S3Operations", None)
 
-    if hyperparameters and len(hyperparameters) > 0:
-        tuner.static_hyperparameters = {
-            utils.to_str(k): utils.to_str(v) for (k, v) in hyperparameters.items()
-        }
+    tuner._prepare_job_name_for_tuning(job_name=job_name)
 
-    if job_name is not None:
-        tuner._current_job_name = job_name
-    else:
-        base_name = tuner.base_tuning_job_name or utils.base_name_from_image(
-            tuner.estimator.train_image()
+    tune_config = {
+        "HyperParameterTuningJobName": tuner._current_job_name,
+        "HyperParameterTuningJobConfig": _extract_tuning_job_config(tuner),
+    }
+
+    if tuner.estimator:
+        tune_config[
+            "TrainingJobDefinition"
+        ], s3_operations = _extract_training_config_from_estimator(
+            tuner, inputs, include_cls_metadata, mini_batch_size
         )
-        tuner._current_job_name = utils.name_from_base(
-            base_name, tuner.TUNING_JOB_NAME_MAX_LENGTH, True
+    else:
+        tune_config[
+            "TrainingJobDefinitions"
+        ], s3_operations = _extract_training_config_list_from_estimator_dict(
+            tuner, inputs, include_cls_metadata, mini_batch_size
         )
 
-    for hyperparameter_name in tuner._hyperparameter_ranges.keys():
-        tuner.static_hyperparameters.pop(hyperparameter_name, None)
+    if s3_operations:
+        tune_config["S3Operations"] = s3_operations
 
-    train_config["StaticHyperParameters"] = tuner.static_hyperparameters
+    if tuner.tags:
+        tune_config["Tags"] = tuner.tags
 
-    tune_config = {
-        "HyperParameterTuningJobName": tuner._current_job_name,
-        "HyperParameterTuningJobConfig": {
-            "Strategy": tuner.strategy,
-            "HyperParameterTuningJobObjective": {
-                "Type": tuner.objective_type,
-                "MetricName": tuner.objective_metric_name,
-            },
-            "ResourceLimits": {
-                "MaxNumberOfTrainingJobs": tuner.max_jobs,
-                "MaxParallelTrainingJobs": tuner.max_parallel_jobs,
-            },
-            "ParameterRanges": tuner.hyperparameter_ranges(),
+    if tuner.warm_start_config:
+        tune_config["WarmStartConfig"] = tuner.warm_start_config.to_input_req()
+
+    return tune_config
+
+
+def _extract_tuning_job_config(tuner):
+    """Extract tuning job config from a HyperparameterTuner"""
+    tuning_job_config = {
+        "Strategy": tuner.strategy,
+        "ResourceLimits": {
+            "MaxNumberOfTrainingJobs": tuner.max_jobs,
+            "MaxParallelTrainingJobs": tuner.max_parallel_jobs,
         },
-        "TrainingJobDefinition": train_config,
+        "TrainingJobEarlyStoppingType": tuner.early_stopping_type,
     }
 
-    if tuner.metric_definitions is not None:
-        tune_config["TrainingJobDefinition"]["AlgorithmSpecification"][
+    if tuner.objective_metric_name:
+        tuning_job_config["HyperParameterTuningJobObjective"] = {
+            "Type": tuner.objective_type,
+            "MetricName": tuner.objective_metric_name,
+        }
+
+    parameter_ranges = tuner.hyperparameter_ranges()
+    if parameter_ranges:
+        tuning_job_config["ParameterRanges"] = parameter_ranges
+
+    if tuner.training_instance_pools:
+        tuning_job_config["TrainingJobInstancePools"] = [
+            {
+                "InstanceType": instance_type,
+                "PoolSize": tuner.training_instance_pools[instance_type],
+            }
+            for instance_type in sorted(tuner.training_instance_pools.keys())
+        ]
+
+    return tuning_job_config
+
+
+def _extract_training_config_from_estimator(tuner, inputs, include_cls_metadata, mini_batch_size):
+    """Extract training job config from a HyperparameterTuner that uses the ``estimator`` field"""
+    train_config = training_base_config(tuner.estimator, inputs, mini_batch_size)
+    train_config.pop("HyperParameters", None)
+
+    tuner._prepare_static_hyperparameters_for_tuning(include_cls_metadata=include_cls_metadata)
+    train_config["StaticHyperParameters"] = tuner.static_hyperparameters
+
+    if tuner.metric_definitions:
+        train_config["AlgorithmSpecification"]["MetricDefinitions"] = tuner.metric_definitions
+
+    s3_operations = train_config.pop("S3Operations", None)
+    return train_config, s3_operations
+
+
+def _extract_training_config_list_from_estimator_dict(
+    tuner, inputs, include_cls_metadata, mini_batch_size
+):
+    """
+    Extract a list of training job configs from a HyperparameterTuner that uses the
+    ``estimator_dict`` field
+    """
+    estimator_names = sorted(tuner.estimator_dict.keys())
+    tuner._validate_dict_argument(name="inputs", value=inputs, allowed_keys=estimator_names)
+    tuner._validate_dict_argument(
+        name="include_cls_metadata", value=include_cls_metadata, allowed_keys=estimator_names
+    )
+    tuner._validate_dict_argument(
+        name="mini_batch_size", value=mini_batch_size, allowed_keys=estimator_names
+    )
+
+    train_config_dict = {}
+    for (estimator_name, estimator) in tuner.estimator_dict.items():
+        train_config_dict[estimator_name] = training_base_config(
+            estimator=estimator,
+            inputs=inputs.get(estimator_name) if inputs else None,
+            mini_batch_size=mini_batch_size.get(estimator_name) if mini_batch_size else None,
+        )
+
+    tuner._prepare_static_hyperparameters_for_tuning(include_cls_metadata=include_cls_metadata)
+
+    train_config_list = []
+    s3_operations_list = []
+
+    for estimator_name in sorted(train_config_dict.keys()):
+        train_config = train_config_dict[estimator_name]
+        train_config.pop("HyperParameters", None)
+        train_config["StaticHyperParameters"] = tuner.static_hyperparameters_dict[estimator_name]
+
+        train_config["AlgorithmSpecification"][
             "MetricDefinitions"
-        ] = tuner.metric_definitions
+        ] = tuner.metric_definitions_dict.get(estimator_name)
 
-    if tuner.tags is not None:
-        tune_config["Tags"] = tuner.tags
+        train_config["DefinitionName"] = estimator_name
+        train_config["TuningObjective"] = {
+            "Type": tuner.objective_type,
+            "MetricName": tuner.objective_metric_name_dict[estimator_name],
+        }
+        train_config["HyperParameterRanges"] = tuner.hyperparameter_ranges_dict()[estimator_name]
 
-    if s3_operations is not None:
-        tune_config["S3Operations"] = s3_operations
+        s3_operations_list.append(train_config.pop("S3Operations", {}))
 
-    return tune_config
+        train_config_list.append(train_config)
+
+    return train_config_list, _merge_s3_operations(s3_operations_list)
+
+
+def _merge_s3_operations(s3_operations_list):
+    """Merge a list of S3 operation dictionaries into one"""
+    s3_operations_merged = {}
+    for s3_operations in s3_operations_list:
+        for (key, operations) in s3_operations.items():
+            if key not in s3_operations_merged:
+                s3_operations_merged[key] = []
+            for operation in operations:
+                if operation not in s3_operations_merged[key]:
+                    s3_operations_merged[key].append(operation)
+    return s3_operations_merged
 
 
 def update_submit_s3_uri(estimator, job_name):