aws
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎CHANGELOG.rst
Lines changed: 20 additions & 1 deletion b/‎CHANGELOG.rst
Lines changed: 20 additions & 1 deletion
diff --git a/‎README.rst
Lines changed: 80 additions & 2037 deletions b/‎README.rst
Lines changed: 80 additions & 2037 deletions
diff --git a/‎doc/analytics.rst
Lines changed: 17 additions & 0 deletions b/‎doc/analytics.rst
Lines changed: 17 additions & 0 deletions
diff --git a/‎doc/index.rst
Lines changed: 3 additions & 1 deletion b/‎doc/index.rst
Lines changed: 3 additions & 1 deletion
diff --git a/‎doc/tuner.rst
Lines changed: 22 additions & 0 deletions b/‎doc/tuner.rst
Lines changed: 22 additions & 0 deletions
diff --git a/‎setup.py
Lines changed: 2 additions & 2 deletions b/‎setup.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/sagemaker/__init__.py
Lines changed: 19 additions & 26 deletions b/‎src/sagemaker/__init__.py
Lines changed: 19 additions & 26 deletions
diff --git a/‎src/sagemaker/amazon/README.rst
Lines changed: 48 additions & 0 deletions b/‎src/sagemaker/amazon/README.rst
Lines changed: 48 additions & 0 deletions
diff --git a/‎src/sagemaker/amazon/amazon_estimator.py
Lines changed: 48 additions & 9 deletions b/‎src/sagemaker/amazon/amazon_estimator.py
Lines changed: 48 additions & 9 deletions
diff --git a/‎src/sagemaker/amazon/hyperparameter.py
Lines changed: 0 additions & 1 deletion b/‎src/sagemaker/amazon/hyperparameter.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/sagemaker/amazon/kmeans.py
Lines changed: 2 additions & 2 deletions b/‎src/sagemaker/amazon/kmeans.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/sagemaker/amazon/lda.py
Lines changed: 6 additions & 3 deletions b/‎src/sagemaker/amazon/lda.py
Lines changed: 6 additions & 3 deletions
diff --git a/‎src/sagemaker/amazon/linear_learner.py
Lines changed: 15 additions & 4 deletions b/‎src/sagemaker/amazon/linear_learner.py
Lines changed: 15 additions & 4 deletions
diff --git a/‎src/sagemaker/amazon/ntm.py
Lines changed: 2 additions & 2 deletions b/‎src/sagemaker/amazon/ntm.py
Lines changed: 2 additions & 2 deletions
@@ -24,3 +24,4 @@ doc/_templates
 venv/
 *~
 .pytest_cache/
+*.swp
@@ -2,10 +2,29 @@
 CHANGELOG
 =========
 
-1.3.dev1
+1.4.2dev
 ========
 
+* bug-fix: Unit Tests: Improve unit test runtime
+* bug-fix: Estimators: Fix attach for LDA
+* bug-fix: Estimators: allow code_location to have no key prefix
+
+1.4.1
+=====
+
+* bug-fix: Local Mode: Fix for non Framework containers
+
+1.4.0
+=====
+
+* bug-fix: Remove __all__ and add noqa in __init__
 * bug-fix: Estimators: Change max_iterations hyperparameter key for KMeans
+* bug-fix: Estimators: Remove unused argument job_details for ``EstimatorBase.attach()``
+* bug-fix: Local Mode: Show logs in Jupyter notebooks
+* feature: HyperparameterTuner: Add support for hyperparameter tuning jobs
+* feature: Analytics: Add functions for metrics in Training and Hyperparameter Tuning jobs
+* feature: Estimators: add support for tagging training jobs
+
 
 1.3.0
 =====
 
@@ -0,0 +1,17 @@
+Analytics
+---------
+
+.. autoclass:: sagemaker.analytics.AnalyticsMetricsBase
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. autoclass:: sagemaker.analytics.HyperparameterTuningJobAnalytics
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. autoclass:: sagemaker.analytics.TrainingJobAnalytics
+    :members:
+    :undoc-members:
+    :show-inheritance:
@@ -4,7 +4,7 @@ Amazon SageMaker Python SDK is an open source library for training and deploying
 
 With the SDK, you can train and deploy models using popular deep learning frameworks: **Apache MXNet** and **TensorFlow**. You can also train and deploy models with **algorithms provided by Amazon**, these are scalable implementations of core machine learning algorithms that are optimized for SageMaker and GPU training. If you have **your own algorithms** built into SageMaker-compatible Docker containers, you can train and host models using these as well.
 
-Here you'll find API docs for SageMaker Python SDK. The project home-page is in Github: https://github.com/aws/sagemaker-python-sdk, there you can find the SDK source, installation instructions and a general overview of the library there. 
+Here you'll find API docs for SageMaker Python SDK. The project home-page is in Github: https://github.com/aws/sagemaker-python-sdk, there you can find the SDK source, installation instructions and a general overview of the library there.
 
 Overview
 ----------
@@ -14,9 +14,11 @@ The SageMaker Python SDK consists of a few primary interfaces:
     :maxdepth: 2
 
     estimators
+    tuner
     predictors
     session
     model
+    analytics
 
 MXNet
 ----------
 
@@ -0,0 +1,22 @@
+HyperparameterTuner
+-------------------
+
+.. autoclass:: sagemaker.tuner.HyperparameterTuner
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. autoclass:: sagemaker.tuner.ContinuousParameter
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. autoclass:: sagemaker.tuner.IntegerParameter
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. autoclass:: sagemaker.tuner.CategoricalParameter
+    :members:
+    :undoc-members:
+    :show-inheritance:
@@ -23,7 +23,7 @@ def read(fname):
 
 
 setup(name="sagemaker",
-      version="1.3.0",
+      version="1.4.1",
       description="Open source library for training and deploying models on Amazon SageMaker.",
       packages=find_packages('src'),
       package_dir={'': 'src'},
@@ -49,7 +49,7 @@ def read(fname):
 
       extras_require={
           'test': ['tox', 'flake8', 'pytest', 'pytest-cov', 'pytest-xdist',
-                   'mock', 'tensorflow>=1.3.0', 'contextlib2', 'awslogs']},
+                   'mock', 'tensorflow>=1.3.0', 'contextlib2', 'awslogs', 'pandas']},
 
       entry_points={
           'console_scripts': ['sagemaker=sagemaker.cli.main:main'],
 
@@ -12,31 +12,24 @@
 # language governing permissions and limitations under the License.
 from __future__ import absolute_import
 
-from sagemaker import estimator
-from sagemaker.amazon.kmeans import KMeans, KMeansModel, KMeansPredictor
-from sagemaker.amazon.pca import PCA, PCAModel, PCAPredictor
-from sagemaker.amazon.lda import LDA, LDAModel, LDAPredictor
-from sagemaker.amazon.linear_learner import LinearLearner, LinearLearnerModel, LinearLearnerPredictor
-from sagemaker.amazon.factorization_machines import FactorizationMachines, FactorizationMachinesModel
-from sagemaker.amazon.factorization_machines import FactorizationMachinesPredictor
-from sagemaker.amazon.ntm import NTM, NTMModel, NTMPredictor
-from sagemaker.amazon.randomcutforest import RandomCutForest, RandomCutForestModel, RandomCutForestPredictor
+from sagemaker import estimator  # noqa: F401
+from sagemaker.amazon.kmeans import KMeans, KMeansModel, KMeansPredictor  # noqa: F401
+from sagemaker.amazon.pca import PCA, PCAModel, PCAPredictor  # noqa: F401
+from sagemaker.amazon.lda import LDA, LDAModel, LDAPredictor  # noqa: F401
+from sagemaker.amazon.linear_learner import LinearLearner, LinearLearnerModel, LinearLearnerPredictor  # noqa: F401
+from sagemaker.amazon.factorization_machines import FactorizationMachines, FactorizationMachinesModel  # noqa: F401
+from sagemaker.amazon.factorization_machines import FactorizationMachinesPredictor  # noqa: F401
+from sagemaker.amazon.ntm import NTM, NTMModel, NTMPredictor  # noqa: F401
+from sagemaker.amazon.randomcutforest import (RandomCutForest, RandomCutForestModel,  # noqa: F401
+                                              RandomCutForestPredictor)
 
-from sagemaker.local.local_session import LocalSession
+from sagemaker.analytics import TrainingJobAnalytics, HyperparameterTuningJobAnalytics  # noqa: F401
+from sagemaker.local.local_session import LocalSession  # noqa: F401
 
-from sagemaker.model import Model
-from sagemaker.predictor import RealTimePredictor
-from sagemaker.session import Session
-from sagemaker.session import container_def
-from sagemaker.session import production_variant
-from sagemaker.session import s3_input
-from sagemaker.session import get_execution_role
-
-
-__all__ = ['estimator', 'KMeans', 'KMeansModel', 'KMeansPredictor', 'PCA', 'PCAModel', 'PCAPredictor', 'LinearLearner',
-           'LinearLearnerModel', 'LinearLearnerPredictor',
-           'LDA', 'LDAModel', 'LDAPredictor',
-           'FactorizationMachines', 'FactorizationMachinesModel', 'FactorizationMachinesPredictor',
-           'RandomCutForest', 'RandomCutForestModel', 'RandomCutForestPredictor',
-           'Model', 'NTM', 'NTMModel', 'NTMPredictor', 'RealTimePredictor', 'Session', 'LocalSession',
-           'container_def', 's3_input', 'production_variant', 'get_execution_role']
+from sagemaker.model import Model  # noqa: F401
+from sagemaker.predictor import RealTimePredictor  # noqa: F401
+from sagemaker.session import Session  # noqa: F401
+from sagemaker.session import container_def  # noqa: F401
+from sagemaker.session import production_variant  # noqa: F401
+from sagemaker.session import s3_input  # noqa: F401
+from sagemaker.session import get_execution_role  # noqa: F401
@@ -0,0 +1,48 @@
+
+===================================
+AWS SageMaker Estimators and Models
+===================================
+
+Amazon SageMaker provides several built-in machine learning algorithms that you can use for a variety of problem types.
+
+The full list of algorithms is available on the AWS website: https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html
+
+SageMaker Python SDK includes Estimator wrappers for the AWS K-means, Principal Components Analysis(PCA), Linear Learner, Factorization Machines, Latent Dirichlet Allocation(LDA), Neural Topic Model(NTM) and Random Cut Forest algorithms.
+
+Definition and usage
+~~~~~~~~~~~~~~~~~~~~
+Estimators that wrap Amazon's built-in algorithms define algorithm's hyperparameters with defaults. When a default is not possible you need to provide the value during construction, e.g.:
+
+- ``KMeans`` Estimator requires parameter ``k`` to define number of clusters
+- ``PCA`` Estimator requires parameter ``num_components`` to define number of principal components
+
+Interaction is identical as any other Estimators. There are additional details about how data is specified.
+
+Input data format
+^^^^^^^^^^^^^^^^^
+Please note that Amazon's built-in algorithms are working best with protobuf ``recordIO`` format.
+The data is expected to be available in S3 location and depending on algorithm it can handle dat in multiple data channels.
+
+This package offers support to prepare data into required fomrat and upload data to S3.
+Provided class ``RecordSet`` captures necessary details like S3 location, number of records, data channel and is expected as input parameter when calling ``fit()``.
+
+Function ``record_set`` is available on algorithms objects to make it simple to achieve the above.
+It takes 2D numpy array as input, uploads data to S3 and returns ``RecordSet`` objects. By default it uses ``train`` data channel and no labels but can be specified when called.
+
+Please find an example code snippet for illustration:
+
+.. code:: python
+
+    from sagemaker import PCA
+    pca_estimator = PCA(role='SageMakerRole', train_instance_count=1, train_instance_type='ml.m4.xlarge', num_components=3)
+
+    import numpy as np
+    records = pca_estimator.record_set(np.arange(10).reshape(2,5))
+
+    pca_estimator.fit(records)
+
+
+Predictions support
+~~~~~~~~~~~~~~~~~~~
+Calling inference on deployed Amazon's built-in algorithms requires specific input format. By default, this library creates a predictor that allows to use just numpy data.
+Data is converted so that ``application/x-recordio-protobuf`` input format is used. Received response is deserialized from the protobuf and provided as result from the ``predict`` call.
@@ -19,7 +19,7 @@
 from sagemaker.amazon import validation
 from sagemaker.amazon.hyperparameter import Hyperparameter as hp  # noqa
 from sagemaker.amazon.common import write_numpy_to_dense_tensor
-from sagemaker.estimator import EstimatorBase
+from sagemaker.estimator import EstimatorBase, _TrainingJob
 from sagemaker.session import s3_input
 from sagemaker.utils import sagemaker_timestamp
 
@@ -92,11 +92,38 @@ def _prepare_init_params_from_job_description(cls, job_details):
         del init_params['image']
         return init_params
 
-    def fit(self, records, mini_batch_size=None, **kwargs):
+    def _prepare_for_training(self, records, mini_batch_size=None, job_name=None):
+        """Set hyperparameters needed for training.
+
+        Args:
+            * records (:class:`~RecordSet`): The records to train this ``Estimator`` on.
+            * mini_batch_size (int or None): The size of each mini-batch to use when training. If ``None``, a
+                default value will be used.
+            * job_name (str): Name of the training job to be created. If not specified, one is generated,
+                using the base name given to the constructor if applicable.
+        """
+        super(AmazonAlgorithmEstimatorBase, self)._prepare_for_training(job_name=job_name)
+
+        feature_dim = None
+
+        if isinstance(records, list):
+            for record in records:
+                if record.channel == 'train':
+                    feature_dim = record.feature_dim
+                    break
+            if feature_dim is None:
+                raise ValueError('Must provide train channel.')
+        else:
+            feature_dim = records.feature_dim
+
+        self.feature_dim = feature_dim
+        self.mini_batch_size = mini_batch_size
+
+    def fit(self, records, mini_batch_size=None, wait=True, logs=True, job_name=None):
         """Fit this Estimator on serialized Record objects, stored in S3.
 
         ``records`` should be an instance of :class:`~RecordSet`. This defines a collection of
-        s3 data files to train this ``Estimator`` on.
+        S3 data files to train this ``Estimator`` on.
 
         Training data is expected to be encoded as dense or sparse vectors in the "values" feature
         on each Record. If the data is labeled, the label is expected to be encoded as a list of
@@ -110,15 +137,19 @@ def fit(self, records, mini_batch_size=None, **kwargs):
 
         Args:
             records (:class:`~RecordSet`): The records to train this ``Estimator`` on
-            mini_batch_size (int or None): The size of each mini-batch to use when training. If None, a
+            mini_batch_size (int or None): The size of each mini-batch to use when training. If ``None``, a
                 default value will be used.
+            wait (bool): Whether the call should wait until the job completes (default: True).
+            logs (bool): Whether to show the logs produced by the job.
+                Only meaningful when wait is True (default: True).
+            job_name (str): Training job name. If not specified, the estimator generates a default job name,
+                based on the training image name and current timestamp.
         """
-        self.feature_dim = records.feature_dim
-        self.mini_batch_size = mini_batch_size
+        self._prepare_for_training(records, job_name=job_name, mini_batch_size=mini_batch_size)
 
-        data = {records.channel: s3_input(records.s3_data, distribution='ShardedByS3Key',
-                                          s3_data_type=records.s3_data_type)}
-        super(AmazonAlgorithmEstimatorBase, self).fit(data, **kwargs)
+        self.latest_training_job = _TrainingJob.start_new(self, records)
+        if wait:
+            self.latest_training_job.wait(logs=logs)
 
     def record_set(self, train, labels=None, channel="train"):
         """Build a :class:`~RecordSet` from a numpy :class:`~ndarray` matrix and label vector.
@@ -180,6 +211,14 @@ def __repr__(self):
         """Return an unambiguous representation of this RecordSet"""
         return str((RecordSet, self.__dict__))
 
+    def data_channel(self):
+        """Return a dictionary to represent the training data in a channel for use with ``fit()``"""
+        return {self.channel: self.records_s3_input()}
+
+    def records_s3_input(self):
+        """Return a s3_input to represent the training data"""
+        return s3_input(self.s3_data, distribution='ShardedByS3Key', s3_data_type=self.s3_data_type)
+
 
 def _build_shards(num_shards, array):
     if num_shards < 1:
 
@@ -46,7 +46,6 @@ def validate(self, value):
                 raise ValueError(error_message)
 
     def __get__(self, obj, objtype):
-        """Return the value of this hyperparameter"""
         if '_hyperparameters' not in dir(obj) or self.name not in obj._hyperparameters:
             raise AttributeError()
         return obj._hyperparameters[self.name]
 
@@ -108,8 +108,8 @@ def create_model(self):
         s3 model data produced by this Estimator."""
         return KMeansModel(self.model_data, self.role, self.sagemaker_session)
 
-    def fit(self, records, mini_batch_size=5000, **kwargs):
-        super(KMeans, self).fit(records, mini_batch_size, **kwargs)
+    def _prepare_for_training(self, records, mini_batch_size=5000, job_name=None):
+        super(KMeans, self)._prepare_for_training(records, mini_batch_size=mini_batch_size, job_name=job_name)
 
     def hyperparameters(self):
         """Return the SageMaker hyperparameters for training this KMeans Estimator"""
 
@@ -78,8 +78,10 @@ def __init__(self, role, train_instance_type, num_topics,
             tol (float): Optional. Target error tolerance for the ALS phase of the algorithm.
             **kwargs: base class keyword argument values.
         """
-
         # this algorithm only supports single instance training
+        if kwargs.pop('train_instance_count', 1) != 1:
+            print('LDA only supports single instance training. Defaulting to 1 {}.'.format(train_instance_type))
+
         super(LDA, self).__init__(role, 1, train_instance_type, **kwargs)
         self.num_topics = num_topics
         self.alpha0 = alpha0
@@ -93,11 +95,12 @@ def create_model(self):
 
         return LDAModel(self.model_data, self.role, sagemaker_session=self.sagemaker_session)
 
-    def fit(self, records, mini_batch_size, **kwargs):
+    def _prepare_for_training(self, records, mini_batch_size, job_name=None):
         # mini_batch_size is required, prevent explicit calls with None
         if mini_batch_size is None:
             raise ValueError("mini_batch_size must be set")
-        super(LDA, self).fit(records, mini_batch_size, **kwargs)
+
+        super(LDA, self)._prepare_for_training(records, mini_batch_size=mini_batch_size, job_name=job_name)
 
 
 class LDAPredictor(RealTimePredictor):
 
@@ -228,12 +228,23 @@ def create_model(self):
 
         return LinearLearnerModel(self.model_data, self.role, self.sagemaker_session)
 
-    def fit(self, records, mini_batch_size=None, **kwargs):
+    def _prepare_for_training(self, records, mini_batch_size=None, job_name=None):
+        num_records = None
+        if isinstance(records, list):
+            for record in records:
+                if record.channel == 'train':
+                    num_records = record.num_records
+                    break
+            if num_records is None:
+                raise ValueError('Must provide train channel.')
+        else:
+            num_records = records.num_records
+
         # mini_batch_size can't be greater than number of records or training job fails
         default_mini_batch_size = min(self.DEFAULT_MINI_BATCH_SIZE,
-                                      max(1, int(records.num_records / self.train_instance_count)))
-        use_mini_batch_size = mini_batch_size or default_mini_batch_size
-        super(LinearLearner, self).fit(records, use_mini_batch_size, **kwargs)
+                                      max(1, int(num_records / self.train_instance_count)))
+        mini_batch_size = mini_batch_size or default_mini_batch_size
+        super(LinearLearner, self)._prepare_for_training(records, mini_batch_size=mini_batch_size, job_name=job_name)
 
 
 class LinearLearnerPredictor(RealTimePredictor):
 
@@ -113,10 +113,10 @@ def create_model(self):
 
         return NTMModel(self.model_data, self.role, sagemaker_session=self.sagemaker_session)
 
-    def fit(self, records, mini_batch_size=None, **kwargs):
+    def _prepare_for_training(self, records, mini_batch_size, job_name=None):
         if mini_batch_size is not None and (mini_batch_size < 1 or mini_batch_size > 10000):
             raise ValueError("mini_batch_size must be in [1, 10000]")
-        super(NTM, self).fit(records, mini_batch_size, **kwargs)
+        super(NTM, self)._prepare_for_training(records, mini_batch_size=mini_batch_size, job_name=job_name)
 
 
 class NTMPredictor(RealTimePredictor):
-Original file line number
+Diff line change
 venv/
 *~
 .pytest_cache/
 +*.swp