aws
diff --git a/‎src/sagemaker/config/config.py
Lines changed: 14 additions & 4 deletions b/‎src/sagemaker/config/config.py
Lines changed: 14 additions & 4 deletions
diff --git a/‎src/sagemaker/config/config_utils.py
Lines changed: 32 additions & 0 deletions b/‎src/sagemaker/config/config_utils.py
Lines changed: 32 additions & 0 deletions
diff --git a/‎src/sagemaker/jumpstart/factory/estimator.py
Lines changed: 1 addition & 0 deletions b/‎src/sagemaker/jumpstart/factory/estimator.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/sagemaker/jumpstart/factory/model.py
Lines changed: 7 additions & 6 deletions b/‎src/sagemaker/jumpstart/factory/model.py
Lines changed: 7 additions & 6 deletions
diff --git a/‎src/sagemaker/model.py
Lines changed: 10 additions & 8 deletions b/‎src/sagemaker/model.py
Lines changed: 10 additions & 8 deletions
diff --git a/‎src/sagemaker/remote_function/job.py
Lines changed: 1 addition & 1 deletion b/‎src/sagemaker/remote_function/job.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/sagemaker/session.py
Lines changed: 16 additions & 10 deletions b/‎src/sagemaker/session.py
Lines changed: 16 additions & 10 deletions
diff --git a/‎tests/integ/sagemaker/remote_function/test_decorator.py
Lines changed: 2 additions & 2 deletions b/‎tests/integ/sagemaker/remote_function/test_decorator.py
Lines changed: 2 additions & 2 deletions
@@ -28,9 +28,10 @@
 from botocore.utils import merge_dicts
 from six.moves.urllib.parse import urlparse
 from sagemaker.config.config_schema import SAGEMAKER_PYTHON_SDK_CONFIG_SCHEMA
-from sagemaker.config.config_utils import get_sagemaker_config_logger
+from sagemaker.config.config_utils import non_repeating_log_factory, get_sagemaker_config_logger
 
 logger = get_sagemaker_config_logger()
+log_info_function = non_repeating_log_factory(logger, "info")
 
 _APP_NAME = "sagemaker"
 # The default name of the config file.
@@ -52,7 +53,9 @@
 S3_PREFIX = "s3://"
 
 
-def load_sagemaker_config(additional_config_paths: List[str] = None, s3_resource=None) -> dict:
+def load_sagemaker_config(
+    additional_config_paths: List[str] = None, s3_resource=None, repeat_log=False
+) -> dict:
     """Loads config files and merges them.
 
     By default, this method first searches for config files in the default locations
@@ -99,6 +102,8 @@ def load_sagemaker_config(additional_config_paths: List[str] = None, s3_resource
             <https://boto3.amazonaws.com/v1/documentation/api\
             /latest/reference/core/session.html#boto3.session.Session.resource>`__.
             This argument is not needed if the config files are present in the local file system.
+        repeat_log (bool): Whether the log with the same contents should be emitted.
+            Default to ``False``
     """
     default_config_path = os.getenv(
         ENV_VARIABLE_ADMIN_CONFIG_OVERRIDE, _DEFAULT_ADMIN_CONFIG_FILE_PATH
@@ -109,6 +114,11 @@ def load_sagemaker_config(additional_config_paths: List[str] = None, s3_resource
         config_paths += additional_config_paths
     config_paths = list(filter(lambda item: item is not None, config_paths))
     merged_config = {}
+
+    log_info = log_info_function
+    if repeat_log:
+        log_info = logger.info
+
     for file_path in config_paths:
         config_from_file = {}
         if file_path.startswith(S3_PREFIX):
@@ -130,9 +140,9 @@ def load_sagemaker_config(additional_config_paths: List[str] = None, s3_resource
         if config_from_file:
             validate_sagemaker_config(config_from_file)
             merge_dicts(merged_config, config_from_file)
-            logger.info("Fetched defaults config from location: %s", file_path)
+            log_info("Fetched defaults config from location: %s", file_path)
         else:
-            logger.info("Not applying SDK defaults from location: %s", file_path)
+            log_info("Not applying SDK defaults from location: %s", file_path)
 
     return merged_config
 
 
@@ -15,9 +15,11 @@
 These utils may be used inside or outside the config module.
 """
 from __future__ import absolute_import
+from collections import deque
 
 import logging
 import sys
+from typing import Callable
 
 
 def get_sagemaker_config_logger():
@@ -197,3 +199,33 @@ def _log_sagemaker_config_merge(
     else:
         # nothing was specified in the config and nothing is being automatically applied
         logger.debug("Skipped value because no value defined\n  config key = %s", config_key_path)
+
+
+def non_repeating_log_factory(logger: logging.Logger, method: str, cache_size=100) -> Callable:
+    """Create log function that filters the repeated messages.
+
+    By default. It only keeps track of last 100 messages, if a repeated
+    message arrives after the ``cache_size`` messages, it will be displayed.
+
+    Args:
+        logger (logging.Logger): the logger to be used to dispatch the message.
+        method (str): the log method, can be info, warning or debug.
+        cache_size (int): the number of last log messages to keep in cache.
+            Default to 100
+
+    Returns:
+        (Callable): the new log method
+    """
+    if method not in ["info", "warning", "debug"]:
+        raise ValueError("Not supported logging method.")
+
+    _caches = deque(maxlen=cache_size)
+    log_method = getattr(logger, method)
+
+    def new_log_method(msg, *args, **kwargs):
+        key = f"{msg}:{args}"
+        if key not in _caches:
+            log_method(msg, *args, **kwargs)
+            _caches.append(key)
+
+    return new_log_method
@@ -338,6 +338,7 @@ def get_deploy_kwargs(
         tolerate_vulnerable_model=tolerate_vulnerable_model,
         tolerate_deprecated_model=tolerate_deprecated_model,
         training_instance_type=training_instance_type,
+        disable_instance_type_logging=True,
     )
 
     estimator_deploy_kwargs: JumpStartEstimatorDeployKwargs = JumpStartEstimatorDeployKwargs(
 
@@ -171,7 +171,9 @@ def _add_vulnerable_and_deprecated_status_to_kwargs(
     return kwargs
 
 
-def _add_instance_type_to_kwargs(kwargs: JumpStartModelInitKwargs) -> JumpStartModelInitKwargs:
+def _add_instance_type_to_kwargs(
+    kwargs: JumpStartModelInitKwargs, disable_instance_type_logging: bool = False
+) -> JumpStartModelInitKwargs:
     """Sets instance type based on default or override, returns full kwargs."""
 
     orig_instance_type = kwargs.instance_type
@@ -187,7 +189,7 @@ def _add_instance_type_to_kwargs(kwargs: JumpStartModelInitKwargs) -> JumpStartM
         training_instance_type=kwargs.training_instance_type,
     )
 
-    if orig_instance_type is None:
+    if not disable_instance_type_logging and orig_instance_type is None:
         JUMPSTART_LOGGER.info(
             "No instance type selected for inference hosting endpoint. Defaulting to %s.",
             kwargs.instance_type,
@@ -551,9 +553,7 @@ def get_deploy_kwargs(
 
     deploy_kwargs = _add_endpoint_name_to_kwargs(kwargs=deploy_kwargs)
 
-    deploy_kwargs = _add_instance_type_to_kwargs(
-        kwargs=deploy_kwargs,
-    )
+    deploy_kwargs = _add_instance_type_to_kwargs(kwargs=deploy_kwargs)
 
     deploy_kwargs.initial_instance_count = initial_instance_count or 1
 
@@ -677,6 +677,7 @@ def get_init_kwargs(
     git_config: Optional[Dict[str, str]] = None,
     model_package_arn: Optional[str] = None,
     training_instance_type: Optional[str] = None,
+    disable_instance_type_logging: bool = False,
     resources: Optional[ResourceRequirements] = None,
 ) -> JumpStartModelInitKwargs:
     """Returns kwargs required to instantiate `sagemaker.estimator.Model` object."""
@@ -720,7 +721,7 @@ def get_init_kwargs(
     model_init_kwargs = _add_model_name_to_kwargs(kwargs=model_init_kwargs)
 
     model_init_kwargs = _add_instance_type_to_kwargs(
-        kwargs=model_init_kwargs,
+        kwargs=model_init_kwargs, disable_instance_type_logging=disable_instance_type_logging
     )
 
     model_init_kwargs = _add_image_uri_to_kwargs(kwargs=model_init_kwargs)
 
@@ -866,16 +866,10 @@ def _create_sagemaker_model(
                 # _base_name, model_name are not needed under PipelineSession.
                 # the model_data may be Pipeline variable
                 # which may break the _base_name generation
-                model_uri = None
-                if isinstance(self.model_data, (str, PipelineVariable)):
-                    model_uri = self.model_data
-                elif isinstance(self.model_data, dict):
-                    model_uri = self.model_data.get("S3DataSource", {}).get("S3Uri", None)
-
                 self._ensure_base_name_if_needed(
                     image_uri=container_def["Image"],
                     script_uri=self.source_dir,
-                    model_uri=model_uri,
+                    model_uri=self._get_model_uri(),
                 )
                 self._set_model_name_if_needed()
 
@@ -912,6 +906,14 @@ def _create_sagemaker_model(
             )
             self.sagemaker_session.create_model(**create_model_args)
 
+    def _get_model_uri(self):
+        model_uri = None
+        if isinstance(self.model_data, (str, PipelineVariable)):
+            model_uri = self.model_data
+        elif isinstance(self.model_data, dict):
+            model_uri = self.model_data.get("S3DataSource", {}).get("S3Uri", None)
+        return model_uri
+
     def _ensure_base_name_if_needed(self, image_uri, script_uri, model_uri):
         """Create a base name from the image URI if there is no model name provided.
 
@@ -1496,7 +1498,7 @@ def deploy(
             self._ensure_base_name_if_needed(
                 image_uri=self.image_uri,
                 script_uri=self.source_dir,
-                model_uri=self.model_data,
+                model_uri=self._get_model_uri(),
             )
             if self._base_name is not None:
                 self._base_name = "-".join((self._base_name, compiled_model_suffix))
 
@@ -891,7 +891,7 @@ def wait(self, timeout: int = None):
         """
 
         self._last_describe_response = _logs_for_job(
-            boto_session=self.sagemaker_session.boto_session,
+            sagemaker_session=self.sagemaker_session,
             job_name=self.job_name,
             wait=True,
             timeout=timeout,
 
@@ -472,6 +472,7 @@ def download_data(self, path, bucket, key_prefix="", extra_args=None):
 
         # Initialize the variables used to loop through the contents of the S3 bucket.
         keys = []
+        directories = []
         next_token = ""
         base_parameters = {"Bucket": bucket, "Prefix": key_prefix}
 
@@ -490,20 +491,26 @@ def download_data(self, path, bucket, key_prefix="", extra_args=None):
                 return []
             # For each object, save its key or directory.
             for s3_object in contents:
-                key = s3_object.get("Key")
-                keys.append(key)
+                key: str = s3_object.get("Key")
+                obj_size = s3_object.get("Size")
+                if key.endswith("/") and int(obj_size) == 0:
+                    directories.append(os.path.join(path, key))
+                else:
+                    keys.append(key)
             next_token = response.get("NextContinuationToken")
 
         # For each object key, create the directory on the local machine if needed, and then
         # download the file.
         downloaded_paths = []
+        for dir_path in directories:
+            os.makedirs(os.path.dirname(dir_path), exist_ok=True)
         for key in keys:
             tail_s3_uri_path = os.path.basename(key)
             if not os.path.splitext(key_prefix)[1]:
                 tail_s3_uri_path = os.path.relpath(key, key_prefix)
             destination_path = os.path.join(path, tail_s3_uri_path)
             if not os.path.exists(os.path.dirname(destination_path)):
-                os.makedirs(os.path.dirname(destination_path))
+                os.makedirs(os.path.dirname(destination_path), exist_ok=True)
             s3.download_file(
                 Bucket=bucket, Key=key, Filename=destination_path, ExtraArgs=extra_args
             )
@@ -5495,7 +5502,7 @@ def logs_for_job(self, job_name, wait=False, poll=10, log_type="All", timeout=No
             exceptions.CapacityError: If the training job fails with CapacityError.
             exceptions.UnexpectedStatusException: If waiting and the training job fails.
         """
-        _logs_for_job(self.boto_session, job_name, wait, poll, log_type, timeout)
+        _logs_for_job(self, job_name, wait, poll, log_type, timeout)
 
     def logs_for_processing_job(self, job_name, wait=False, poll=10):
         """Display logs for a given processing job, optionally tailing them until the is complete.
@@ -7378,17 +7385,16 @@ def _rule_statuses_changed(current_statuses, last_statuses):
 
 
 def _logs_for_job(  # noqa: C901 - suppress complexity warning for this method
-    boto_session, job_name, wait=False, poll=10, log_type="All", timeout=None
+    sagemaker_session, job_name, wait=False, poll=10, log_type="All", timeout=None
 ):
     """Display logs for a given training job, optionally tailing them until job is complete.
 
     If the output is a tty or a Jupyter cell, it will be color-coded
     based on which instance the log entry is from.
 
     Args:
-        boto_session (boto3.session.Session): The underlying Boto3 session which AWS service
-                calls are delegated to (default: None). If not provided, one is created with
-                default AWS configuration chain.
+        sagemaker_session (sagemaker.session.Session): A SageMaker Session
+            object, used for SageMaker interactions.
         job_name (str): Name of the training job to display the logs for.
         wait (bool): Whether to keep looking for new log entries until the job completes
             (default: False).
@@ -7405,13 +7411,13 @@ def _logs_for_job(  # noqa: C901 - suppress complexity warning for this method
         exceptions.CapacityError: If the training job fails with CapacityError.
         exceptions.UnexpectedStatusException: If waiting and the training job fails.
     """
-    sagemaker_client = boto_session.client("sagemaker")
+    sagemaker_client = sagemaker_session.sagemaker_client
     request_end_time = time.time() + timeout if timeout else None
     description = sagemaker_client.describe_training_job(TrainingJobName=job_name)
     print(secondary_training_status_message(description, None), end="")
 
     instance_count, stream_names, positions, client, log_group, dot, color_wrap = _logs_init(
-        boto_session, description, job="Training"
+        sagemaker_session.boto_session, description, job="Training"
     )
 
     state = _get_initial_job_state(description, "TrainingJobStatus", wait)
 
@@ -207,7 +207,7 @@ def test_with_additional_dependencies(
     def cuberoot(x):
         from scipy.special import cbrt
 
-        return cbrt(27)
+        return cbrt(x)
 
     assert cuberoot(27) == 3
 
@@ -742,7 +742,7 @@ def test_with_user_and_workdir_set_in_the_image(
     def cuberoot(x):
         from scipy.special import cbrt
 
-        return cbrt(27)
+        return cbrt(x)
 
     assert cuberoot(27) == 3
Original file line number	Diff line number	Diff line change
`@@ -338,6 +338,7 @@ def get_deploy_kwargs(`
`338`	`338`	`tolerate_vulnerable_model=tolerate_vulnerable_model,`
`339`	`339`	`tolerate_deprecated_model=tolerate_deprecated_model,`
`340`	`340`	`training_instance_type=training_instance_type,`
	`341`	`+ disable_instance_type_logging=True,`
`341`	`342`	`)`
`342`	`343`
`343`	`344`	`estimator_deploy_kwargs: JumpStartEstimatorDeployKwargs = JumpStartEstimatorDeployKwargs(`