aws · metrizable · Sep 14, 2020 · Sep 14, 2020
@@ -0,0 +1,4 @@
+[pydocstyle]
+inherit = false
+ignore = D104,D107,D202,D203,D205,D209,D212,D213,D214,D400,D401,D404,D406,D407,D411,D413,D414,D415,D417
+match = (?!record_pb2).*\.py
@@ -57,9 +57,9 @@ def __init__(
         eval_metrics=None,
         **kwargs
     ):
-        """RandomCutForest is :class:`Estimator` used for anomaly detection.
+        """An `Estimator` class implementing a Random Cut Forest.
 
-        This Estimator may be fit via calls to
+        Typically used for anomaly detection, this Estimator may be fit via calls to
         :meth:`~sagemaker.amazon.amazon_estimator.AmazonAlgorithmEstimatorBase.fit`.
         It requires Amazon :class:`~sagemaker.amazon.record_pb2.Record` protobuf
         serialized data to be stored in S3. There is an utility

@@ -108,6 +108,7 @@ def name(self):
         return self._tuning_job_name
 
     def __repr__(self):
+        """Human-readable representation override."""
         return "<sagemaker.HyperparameterTuningJobAnalytics for %s>" % self.name
 
     def clear_cache(self):
@@ -117,9 +118,10 @@ def clear_cache(self):
         self._training_job_summaries = None
 
     def _fetch_dataframe(self):
-        """Return a pandas dataframe with all the training jobs, along with
-        their hyperparameters, results, and metadata. This also includes a
-        column to indicate if a training job was the best seen so far.
+        """Return a pandas dataframe with all the training jobs.
+
+        This includes their hyperparameters, results, and metadata, as well as
+        a column to indicate if a training job was the best seen so far.
         """
 
         def reshape(training_summary):
@@ -320,20 +322,27 @@ def name(self):
         return self._training_job_name
 
     def __repr__(self):
+        """The human-readable representation override."""
         return "<sagemaker.TrainingJobAnalytics for %s>" % self.name
 
     def clear_cache(self):
-        """Clear the object of all local caches of API methods, so that the next
-        time any properties are accessed they will be refreshed from the
-        service.
+        """Clear the object of all local caches of API methods.
+
+        This is so that the next time any properties are accessed they will be
+        refreshed from the service.
         """
         super(TrainingJobAnalytics, self).clear_cache()
         self._data = defaultdict(list)
         self._time_interval = self._determine_timeinterval()
 
     def _determine_timeinterval(self):
-        """Return a dictionary with two datetime objects, start_time and
-        end_time, covering the interval of the training job
+        """Return a dict with two datetime objects.
+
+        The dict includes the `start_time` and `end_time`, covering the interval
+        of the training job.
+
+        Returns:
+            a dict with the `start_time` and `end_time`.
         """
         description = self._sage_client.describe_training_job(TrainingJobName=self.name)
         start_time = self._start_time or description[u"TrainingStartTime"]  # datetime object
@@ -359,7 +368,7 @@ def _fetch_metric(self, metric_name):
         """Fetch all the values of a named metric, and add them to _data
 
         Args:
-            metric_name:
+            metric_name: The metric name to fetch.
         """
         request = {
             "Namespace": self.CLOUDWATCH_NAMESPACE,
@@ -389,13 +398,14 @@ def _fetch_metric(self, metric_name):
             self._add_single_metric(elapsed_seconds, metric_name, value)
 
     def _add_single_metric(self, timestamp, metric_name, value):
-        """Store a single metric in the _data dict which can be converted to a
-        dataframe.
+        """Store a single metric in the _data dict.
+
+        This can be converted to a dataframe.
 
         Args:
-            timestamp:
-            metric_name:
-            value:
+            timestamp: The timestamp of the metric.
+            metric_name: The name of the metric.
+            value: The value of the metric.
         """
         # note that this method is built this way to make it possible to
         # support live-refreshing charts in Bokeh at some point in the future.
@@ -480,6 +490,7 @@ def name(self):
         return self._experiment_name
 
     def __repr__(self):
+        """The human-readable representation override."""
         return "<sagemaker.ExperimentAnalytics for %s>" % self.name
 
     def clear_cache(self):

@@ -183,6 +183,7 @@ def best_candidate(self, job_name=None):
         Args:
             job_name (str): The name of the AutoML job. If None, will use object's
                 _current_auto_ml_job_name.
+
         Returns:
             dict: a dictionary with information of the best candidate
         """
@@ -226,6 +227,7 @@ def list_candidates(
                 Default to None.
             max_results (int): The number of candidates will be listed in results,
                 between 1 to 100. Default to None. If None, will return all the candidates.
+
         Returns:
             list: A list of dictionaries with candidates information
         """
@@ -288,7 +290,6 @@ def create_model(
 
         Returns:
             PipelineModel object
-
         """
         sagemaker_session = sagemaker_session or self.sagemaker_session
 
@@ -434,7 +435,6 @@ def _check_problem_type_and_job_objective(self, problem_type, job_objective):
 
         Raises (ValueError): raises ValueError if one of problem_type and job_objective is provided
             while the other is None.
-
         """
         if not (problem_type and job_objective) and (problem_type or job_objective):
             raise ValueError(
@@ -500,7 +500,6 @@ def _check_inference_keys(cls, inference_response_keys, containers):
         Raises:
             ValueError, if one or more keys in inference_response_keys are not supported
             the inference pipeline.
-
         """
         if not inference_response_keys:
             return
@@ -537,7 +536,6 @@ def validate_and_update_inference_response(cls, inference_containers, inference_
 
         Raises:
             ValueError: if one or more of inference_response_keys are unsupported by the model
-
         """
         if not inference_response_keys:
             return
@@ -704,12 +702,11 @@ def _format_inputs_to_input_config(
         Args:
             inputs (str, list[str]): local path(s) or S3 uri(s) of input datasets.
             validate_uri (bool): indicates whether it is needed to validate S3 uri.
-            compression (str):
+            compression (str): Compression type of the input data.
             target_attribute_name (str): the target attribute name for classification
                 or regression.
 
         Returns (dict): a dict of AutoML InputDataConfig
-
         """
         if inputs is None:
             return None
@@ -758,7 +755,6 @@ def _prepare_auto_ml_stop_condition(
             total_job_runtime_in_seconds (int): the total wait time of an AutoML job.
 
         Returns (dict): an AutoML CompletionCriteria.
-
         """
         stopping_condition = {"MaxCandidates": max_candidates}
 
@@ -777,6 +773,7 @@ def describe(self):
 
     def wait(self, logs=True):
         """Wait for the AutoML job to finish.
+
         Args:
             logs (bool): indicate whether to output logs.
         """

@@ -164,7 +164,7 @@ def _update_code_from_cell(self, cell):
         return self._code_str_to_source_list(updated_code)
 
     def _code_str_to_source_list(self, code):
-        """Converts a string of code into a list for a Jupyter notebook code cell.
+        r"""Converts a string of code into a list for a Jupyter notebook code cell.
 
         Args:
             code (str): Code to be converted.

@@ -60,7 +60,7 @@ def modify_node(self, node):
 
 
 def _rename_namespace(node, name):
-    """Rename namespace ``session`` to ``inputs`` """
+    """Rename namespace ``session`` to ``inputs``."""
     if isinstance(node.func.value, ast.Attribute) and node.func.value.attr == name:
         node.func.value.attr = "inputs"
     elif isinstance(node.func.value, ast.Name) and node.func.value.id == name:

@@ -365,6 +365,11 @@ def __init__(self, name, parameters=None):
         self.parameters = parameters
 
     def __eq__(self, other):
+        """Equals method override.
+
+        Args:
+            other: Object to test equality against.
+        """
         if not isinstance(other, CollectionConfig):
             raise TypeError(
                 "CollectionConfig is only comparable with other CollectionConfig objects."
@@ -373,6 +378,11 @@ def __eq__(self, other):
         return self.name == other.name and self.parameters == other.parameters
 
     def __ne__(self, other):
+        """Not-equals method override.
+
+        Args:
+            other: Object to test equality against.
+        """
         if not isinstance(other, CollectionConfig):
             raise TypeError(
                 "CollectionConfig is only comparable with other CollectionConfig objects."
@@ -381,6 +391,7 @@ def __ne__(self, other):
         return self.name != other.name or self.parameters != other.parameters
 
     def __hash__(self):
+        """Hash method override."""
         return hash((self.name, tuple(sorted((self.parameters or {}).items()))))
 
     def _to_request_dict(self):

@@ -1489,8 +1489,9 @@ def __init__(
         enable_sagemaker_metrics=None,
         **kwargs
     ):
-        """Base class initializer. Subclasses which override ``__init__`` should
-        invoke ``super()``
+        """Base class initializer.
+
+        Subclasses which override ``__init__`` should invoke ``super()``.
 
         Args:
             entry_point (str): Path (absolute or relative) to the local Python
@@ -1499,8 +1500,8 @@ def __init__(
                 must point to a file located at the root of ``source_dir``.
                 If 'git_config' is provided, 'entry_point' should be
                 a relative location to the Python source file in the Git repo.
-                Example:
 
+                Example:
                     With the following GitHub repo directory structure:
 
                     >>> |----- README.md
@@ -1729,9 +1730,7 @@ def _prepare_for_training(self, job_name=None):
         self._validate_and_set_debugger_configs()
 
     def _validate_and_set_debugger_configs(self):
-        """
-        Set defaults for debugging
-        """
+        """Set defaults for debugging."""
         if self.debugger_hook_config is None and _region_supports_debugger(
             self.sagemaker_session.boto_region_name
         ):

@@ -22,9 +22,11 @@
 
 
 def git_clone_repo(git_config, entry_point, source_dir=None, dependencies=None):
-    """Git clone repo containing the training code and serving code. This method
-    also validate ``git_config``, and set ``entry_point``, ``source_dir`` and
-    ``dependencies`` to the right file or directory in the repo cloned.
+    """Git clone repo containing the training code and serving code.
+
+    This method also validate ``git_config``, and set ``entry_point``,
+    ``source_dir`` and ``dependencies`` to the right file or directory in the
+    repo cloned.
 
     Args:
         git_config (dict[str, str]): Git configurations used for cloning files,
@@ -118,9 +120,13 @@ def git_clone_repo(git_config, entry_point, source_dir=None, dependencies=None):
 
 
 def _validate_git_config(git_config):
-    """
+    """Validates the git configuration.
+
+    Checks all configuration values except 2FA_enabled are string types. The
+    2FA_enabled configuration should be a boolean.
+
     Args:
-        git_config:
+        git_config: The configuration to validate.
     """
     if "repo" not in git_config:
         raise ValueError("Please provide a repo for git_config.")
@@ -133,8 +139,9 @@ def _validate_git_config(git_config):
 
 
 def _generate_and_run_clone_command(git_config, dest_dir):
-    """check if a git_config param is valid, if it is, create the command to git
-    clone the repo, and run it.
+    """Check if a git_config param is valid.
+
+    If it is valid, create the command to git, clone the repo, and run it.
 
     Args:
         git_config ((dict[str, str]): Git configurations used for cloning files,
@@ -153,8 +160,9 @@ def _generate_and_run_clone_command(git_config, dest_dir):
 
 
 def _clone_command_for_github_like(git_config, dest_dir):
-    """check if a git_config param representing a GitHub (or like) repo is
-    valid, if it is, create the command to git clone the repo, and run it.
+    """Check if a git_config param representing a GitHub (or like) repo is valid.
+
+    If it is valid, create the command to git clone the repo, and run it.
 
     Args:
         git_config ((dict[str, str]): Git configurations used for cloning files,
@@ -223,8 +231,9 @@ def _clone_command_for_github_like_https_2fa_enabled(git_config, dest_dir):
 
 
 def _clone_command_for_codecommit(git_config, dest_dir):
-    """check if a git_config param representing a CodeCommit repo is valid, if
-    it is, create the command to git clone the repo, and run it.
+    """Check if a git_config param representing a CodeCommit repo is valid.
+
+    If it is, create the command to git clone the repo, and run it.
 
     Args:
         git_config ((dict[str, str]): Git configurations used for cloning files,
@@ -250,10 +259,11 @@ def _clone_command_for_codecommit(git_config, dest_dir):
 
 
 def _clone_command_for_codecommit_https(git_config, dest_dir):
-    """
+    """Invoke the clone command for codecommit.
+
     Args:
-        git_config:
-        dest_dir:
+        git_config: The git configuration.
+        dest_dir: The destination directory for the clone.
     """
     updated_url = git_config["repo"]
     if "username" in git_config and "password" in git_config:

@@ -237,7 +237,7 @@ def split(self, filename):
             filename (str): path to the file to split
 
         Returns: generator for the individual records that were split from
-        the file
+            the file
         """
         with open(filename, "rb") as f:
             buf = f.read()
@@ -246,15 +246,15 @@ def split(self, filename):
             yield buf
 
     def _is_binary(self, buf):
-        """binary check.
-        Check whether `buf` contains binary data.
-        Returns true if `buf` contains any non-utf-8 characters.
+        """Check whether `buf` contains binary data.
+
+        Returns True if `buf` contains any non-utf-8 characters.
 
         Args:
-                    buf (bytes): data to inspect
+            buf (bytes): data to inspect
 
         Returns:
-                   True if data is binary, otherwise False
+            True if data is binary, otherwise False
         """
         return bool(buf.translate(None, self._textchars))
 
@@ -398,8 +398,9 @@ def _payload_size_within_limit(payload, size):
 
 
 def _validate_payload_size(payload, size):
-    """Check if a payload is within the size in MB threshold. Raise an exception
-    otherwise.
+    """Check if a payload is within the size in MB threshold.
+
+    Raise an exception if the payload is beyond the size in MB threshold.
 
     Args:
         payload: data that will be checked