aws · iquintero · May 10, 2018 · May 9, 2018 · May 10, 2018 · May 10, 2018
@@ -14,13 +14,12 @@
 
 import json
 import logging
+import os
 from abc import ABCMeta
 from abc import abstractmethod
 from six import with_metaclass, string_types
 
-from sagemaker.fw_utils import tar_and_upload_dir
-from sagemaker.fw_utils import parse_s3_url
-from sagemaker.fw_utils import UploadedCode
+from sagemaker.fw_utils import tar_and_upload_dir, parse_s3_url, UploadedCode, validate_source_dir
 from sagemaker.local.local_session import LocalSession, file_input
 
 from sagemaker.model import Model
@@ -30,7 +29,7 @@
 from sagemaker.predictor import RealTimePredictor
 from sagemaker.session import Session
 from sagemaker.session import s3_input
-from sagemaker.utils import base_name_from_image, name_from_base
+from sagemaker.utils import base_name_from_image, name_from_base, get_config_value
 
 
 class EstimatorBase(with_metaclass(ABCMeta, object)):
@@ -83,13 +82,10 @@ def __init__(self, role, train_instance_count, train_instance_type,
         self.input_mode = input_mode
 
         if self.train_instance_type in ('local', 'local_gpu'):
-            self.local_mode = True
             if self.train_instance_type == 'local_gpu' and self.train_instance_count > 1:
                 raise RuntimeError("Distributed Training in Local GPU is not supported")
-
             self.sagemaker_session = sagemaker_session or LocalSession()
         else:
-            self.local_mode = False
             self.sagemaker_session = sagemaker_session or Session()
 
         self.base_job_name = base_job_name
@@ -158,9 +154,14 @@ def fit(self, inputs, wait=True, logs=True, job_name=None):
             base_name = self.base_job_name or base_name_from_image(self.train_image())
             self._current_job_name = name_from_base(base_name)
 
-        # if output_path was specified we use it otherwise initialize here
+        # if output_path was specified we use it otherwise initialize here.
+        # For Local Mode with no_internet=True we don't need an explicit output_path
         if self.output_path is None:
-            self.output_path = 's3://{}/'.format(self.sagemaker_session.default_bucket())
+            no_internet = get_config_value('local.no_internet', self.sagemaker_session.config)
+            if self.sagemaker_session.local_mode and no_internet:
+                self.output_path = ''
+            else:
+                self.output_path = 's3://{}/'.format(self.sagemaker_session.default_bucket())
 
         self.latest_training_job = _TrainingJob.start_new(self, inputs)
         if wait:
@@ -323,7 +324,7 @@ def start_new(cls, estimator, inputs):
             sagemaker.estimator.Framework: Constructed object that captures all information about the started job.
         """
 
-        local_mode = estimator.local_mode
+        local_mode = estimator.sagemaker_session.local_mode
 
         # Allow file:// input only in local mode
         if isinstance(inputs, str) and inputs.startswith('file://'):
@@ -604,27 +605,54 @@ def fit(self, inputs, wait=True, logs=True, job_name=None):
             base_name = self.base_job_name or base_name_from_image(self.train_image())
             self._current_job_name = name_from_base(base_name)
 
+        # validate source dir will raise a ValueError if there is something wrong with the
+        # source directory. We are intentionally not handling it because this is a critical error.
+        if self.source_dir and not self.source_dir.lower().startswith('s3://'):
+            validate_source_dir(self.entry_point, self.source_dir)
+
+        # if we are in local mode with no_internet=True. We want the container to just
+        # mount the source dir instead of uploading to S3.
+        no_internet = get_config_value('local.no_internet', self.sagemaker_session.config)
+        if self.sagemaker_session.local_mode and no_internet:
+            # if there is no source dir, use the directory containing the entry point.
+            if self.source_dir is None:
+                self.source_dir = os.path.dirname(self.entry_point)
+            self.entry_point = os.path.basename(self.entry_point)
+
+            code_dir = 'file://' + self.source_dir
+            script = self.entry_point
+        else:
+            self.uploaded_code = self._stage_user_code_in_s3()
+            code_dir = self.uploaded_code.s3_prefix
+            script = self.uploaded_code.script_name
+
+        # Modify hyperparameters in-place to point to the right code directory and script URIs
+        self._hyperparameters[DIR_PARAM_NAME] = code_dir
+        self._hyperparameters[SCRIPT_PARAM_NAME] = script
+        self._hyperparameters[CLOUDWATCH_METRICS_PARAM_NAME] = self.enable_cloudwatch_metrics
+        self._hyperparameters[CONTAINER_LOG_LEVEL_PARAM_NAME] = self.container_log_level
+        self._hyperparameters[JOB_NAME_PARAM_NAME] = self._current_job_name
+        self._hyperparameters[SAGEMAKER_REGION_PARAM_NAME] = self.sagemaker_session.region_name
+        super(Framework, self).fit(inputs, wait, logs, self._current_job_name)
+
+    def _stage_user_code_in_s3(self):
+        """ Upload the user training script to s3 and return the location.
+
+        Returns: s3 uri
+
+        """
         if self.code_location is None:
             code_bucket = self.sagemaker_session.default_bucket()
             code_s3_prefix = '{}/source'.format(self._current_job_name)
         else:
             code_bucket, key_prefix = parse_s3_url(self.code_location)
             code_s3_prefix = '{}/{}/source'.format(key_prefix, self._current_job_name)
 
-        self.uploaded_code = tar_and_upload_dir(session=self.sagemaker_session.boto_session,
-                                                bucket=code_bucket,
-                                                s3_key_prefix=code_s3_prefix,
-                                                script=self.entry_point,
-                                                directory=self.source_dir)
-
-        # Modify hyperparameters in-place to add the URLs to the uploaded code.
-        self._hyperparameters[DIR_PARAM_NAME] = self.uploaded_code.s3_prefix
-        self._hyperparameters[SCRIPT_PARAM_NAME] = self.uploaded_code.script_name
-        self._hyperparameters[CLOUDWATCH_METRICS_PARAM_NAME] = self.enable_cloudwatch_metrics
-        self._hyperparameters[CONTAINER_LOG_LEVEL_PARAM_NAME] = self.container_log_level
-        self._hyperparameters[JOB_NAME_PARAM_NAME] = self._current_job_name
-        self._hyperparameters[SAGEMAKER_REGION_PARAM_NAME] = self.sagemaker_session.boto_session.region_name
-        super(Framework, self).fit(inputs, wait, logs, self._current_job_name)
+        return tar_and_upload_dir(session=self.sagemaker_session.boto_session,
+                                  bucket=code_bucket,
+                                  s3_key_prefix=code_s3_prefix,
+                                  script=self.entry_point,
+                                  directory=self.source_dir)
 
     def hyperparameters(self):
         """Return the hyperparameters as a dictionary to use for training.

@@ -68,6 +68,27 @@ def create_image_uri(region, framework, instance_type, framework_version, py_ver
         .format(account, region, framework, tag)
 
 
+def validate_source_dir(script, directory):
+    """Validate that the source directory exists and it contains the user script
+
+    Args:
+        script (str):  Script filename.
+        directory (str): Directory containing the source file.
+
+    Raises:
+        ValueError: If ``directory`` does not exist, is not a directory, or does not contain ``script``.
+    """
+    if directory:
+        if not os.path.exists(directory):
+            raise ValueError('"{}" does not exist.'.format(directory))
+        if not os.path.isdir(directory):
+            raise ValueError('"{}" is not a directory.'.format(directory))
+        if script not in os.listdir(directory):
+            raise ValueError('No file named "{}" was found in directory "{}".'.format(script, directory))
+
+    return True
+
+
 def tar_and_upload_dir(session, bucket, s3_key_prefix, script, directory):
     """Pack and upload source files to S3 only if directory is empty or local.
 
@@ -83,21 +104,13 @@ def tar_and_upload_dir(session, bucket, s3_key_prefix, script, directory):
 
     Returns:
         sagemaker.fw_utils.UserCode: An object with the S3 bucket and key (S3 prefix) and script name.
-
-    Raises:
-        ValueError: If ``directory`` does not exist, is not a directory, or does not contain ``script``.
     """
     if directory:
         if directory.lower().startswith("s3://"):
             return UploadedCode(s3_prefix=directory, script_name=os.path.basename(script))
-        if not os.path.exists(directory):
-            raise ValueError('"{}" does not exist.'.format(directory))
-        if not os.path.isdir(directory):
-            raise ValueError('"{}" is not a directory.'.format(directory))
-        if script not in os.listdir(directory):
-            raise ValueError('No file named "{}" was found in directory "{}".'.format(script, directory))
-        script_name = script
-        source_files = [os.path.join(directory, name) for name in os.listdir(directory)]
+        else:
+            script_name = script
+            source_files = [os.path.join(directory, name) for name in os.listdir(directory)]
     else:
         # If no directory is specified, the script parameter needs to be a valid relative path.
         os.path.exists(script)

@@ -29,6 +29,9 @@
 
 import yaml
 
+import sagemaker
+from sagemaker.utils import get_config_value
+
 CONTAINER_PREFIX = "algo"
 DOCKER_COMPOSE_FILENAME = 'docker-compose.yaml'
 
@@ -70,9 +73,7 @@ def __init__(self, instance_type, instance_count, image, sagemaker_session=None)
         self.container = None
         # set the local config. This is optional and will use reasonable defaults
         # if not present.
-        self.local_config = None
-        if self.sagemaker_session.config and 'local' in self.sagemaker_session.config:
-            self.local_config = self.sagemaker_session.config['local']
+        self.local_config = get_config_value('local', self.sagemaker_session.config)
 
     def train(self, input_data_config, hyperparameters):
         """Run a training job locally using docker-compose.
@@ -85,6 +86,7 @@ def train(self, input_data_config, hyperparameters):
         """
         self.container_root = self._create_tmp_folder()
         os.mkdir(os.path.join(self.container_root, 'output'))
+        os.mkdir(os.path.join(self.container_root, 'shared'))
 
         data_dir = self._create_tmp_folder()
         volumes = []
@@ -116,6 +118,13 @@ def train(self, input_data_config, hyperparameters):
             else:
                 raise ValueError('Unknown URI scheme {}'.format(parsed_uri.scheme))
 
+        # If the training script directory is a local directory, mount it to the container.
+        training_dir = json.loads(hyperparameters[sagemaker.estimator.DIR_PARAM_NAME])
+        parsed_uri = urlparse(training_dir)
+        if parsed_uri.scheme == 'file':
+            volumes.append(_Volume(parsed_uri.path, '/opt/ml/code'))
+            volumes.append(_Volume(os.path.join(self.container_root, 'shared'), '/opt/ml/shared'))
+
         # Create the configuration files for each container that we will create
         # Each container will map the additional local volumes (if any).
         for host in self.hosts:
@@ -171,7 +180,16 @@ def serve(self, primary_container):
 
         _ecr_login_if_needed(self.sagemaker_session.boto_session, self.image)
 
-        self._generate_compose_file('serve', additional_env_vars=env_vars)
+        # If the user script was passed as a file:// mount it to the container.
+        script_dir = primary_container['Environment'][sagemaker.estimator.DIR_PARAM_NAME.upper()]
+        parsed_uri = urlparse(script_dir)
+        volumes = []
+        if parsed_uri.scheme == 'file':
+            volumes.append(_Volume(parsed_uri.path, '/opt/ml/code'))
+
+        self._generate_compose_file('serve',
+                                    additional_env_vars=env_vars,
+                                    additional_volumes=volumes)
         compose_command = self._compose()
         self.container = _HostingContainer(compose_command)
         self.container.up()
@@ -366,8 +384,9 @@ def _create_docker_host(self, host, environment, optml_subdirs, command, volumes
             }
         }
 
-        serving_port = 8080 if self.local_config is None else self.local_config.get('serving_port', 8080)
         if command == 'serve':
+            serving_port = get_config_value('local.serving_port',
+                                            self.sagemaker_session.config) or 8080
             host_config.update({
                 'ports': [
                     '%s:8080' % serving_port
@@ -377,9 +396,9 @@ def _create_docker_host(self, host, environment, optml_subdirs, command, volumes
         return host_config
 
     def _create_tmp_folder(self):
-        root_dir = None
-        if self.local_config and 'container_root' in self.local_config:
-            root_dir = os.path.abspath(self.local_config['container_root'])
+        root_dir = get_config_value('local.container_root', self.sagemaker_session.config)
+        if root_dir:
+            root_dir = os.path.abspath(root_dir)
 
         dir = tempfile.mkdtemp(dir=root_dir)
 
@@ -565,6 +584,10 @@ def _ecr_login_if_needed(boto_session, image):
     if _check_output('docker images -q %s' % image).strip():
         return
 
+    if not boto_session:
+        raise RuntimeError('A boto session is required to login to ECR.'
+                           'Please pull the image: %s manually.' % image)
+
     ecr = boto_session.client('ecr')
     auth = ecr.get_authorization_token(registryIds=[image.split('.')[0]])
     authorization_data = auth['authorizationData'][0]

@@ -15,11 +15,13 @@
 import platform
 import time
 
+import boto3
 import urllib3
 from botocore.exceptions import ClientError
 
 from sagemaker.local.image import _SageMakerContainer
 from sagemaker.session import Session
+from sagemaker.utils import get_config_value
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.WARNING)
@@ -115,9 +117,7 @@ def create_endpoint(self, EndpointName, EndpointConfigName):
 
         i = 0
         http = urllib3.PoolManager()
-        serving_port = 8080
-        if self.sagemaker_session.config and 'local' in self.sagemaker_session.config:
-            serving_port = self.sagemaker_session.config['local'].get('serving_port', 8080)
+        serving_port = get_config_value('local.serving_port', self.sagemaker_session.config) or 8080
         endpoint_url = "http://localhost:%s/ping" % serving_port
         while True:
             i += 1
@@ -153,8 +153,8 @@ def __init__(self, config=None):
         """
         self.http = urllib3.PoolManager()
         self.serving_port = 8080
-        if config and 'local' in config:
-            self.serving_port = config['local'].get('serving_port', 8080)
+        self.config = config
+        self.serving_port = get_config_value('local.serving_port', config) or 8080
 
     def invoke_endpoint(self, Body, EndpointName, ContentType, Accept):
         url = "http://localhost:%s/invocations" % self.serving_port
@@ -171,8 +171,26 @@ def __init__(self, boto_session=None):
 
         if platform.system() == 'Windows':
             logger.warning("Windows Support for Local Mode is Experimental")
+
+    def _initialize(self, boto_session, sagemaker_client, sagemaker_runtime_client):
+        """Initialize a boto session for this Local SageMaker Session."""
+        if get_config_value('local.no_internet', self.config):
+            # if no_internet is set to True in the config file then we won't create a boto_session
+            # this will make any component that defaults to using S3 utilize a local file instead.
+            self.boto_session = None
+            self.region_name = get_config_value('local.region_name', self.config)
+            if self.region_name is None:
+                raise ValueError('Must setup region_name in the sagemaker config file. See <Link to Readme Here>')
+        else:
+            self.boto_session = boto_session or boto3.Session()
+            self.region_name = self.boto_session.region_name
+
+            if self.region_name is None:
+                raise ValueError('Must setup local AWS configuration with a region supported by SageMaker.')
+
         self.sagemaker_client = LocalSagemakerClient(self)
         self.sagemaker_runtime_client = LocalSagemakerRuntimeClient(self.config)
+        self.local_mode = True
 
     def logs_for_job(self, job_name, wait=False, poll=5):
         # override logs_for_job() as it doesn't need to perform any action

@@ -16,7 +16,7 @@
 
 from sagemaker.fw_utils import tar_and_upload_dir, parse_s3_url
 from sagemaker.session import Session
-from sagemaker.utils import name_from_image
+from sagemaker.utils import name_from_image, get_config_value
 
 
 class Model(object):
@@ -160,7 +160,11 @@ def prepare_container_def(self, instance_type):
         Returns:
             dict[str, str]: A container definition object usable with the CreateModel API.
         """
-        self._upload_code(self.key_prefix or self.name or name_from_image(self.image))
+        no_internet = get_config_value('local.no_internet', self.sagemaker_session.config)
+        if self.sagemaker_session.local_mode and no_internet:
+            self.uploaded_code = None
+        else:
+            self._upload_code(self.key_prefix or self.name or name_from_image(self.image))
         deploy_env = dict(self.env)
         deploy_env.update(self._framework_env_vars())
         return sagemaker.container_def(self.image, self.model_data, deploy_env)
@@ -173,8 +177,17 @@ def _upload_code(self, key_prefix):
                                                 directory=self.source_dir)
 
     def _framework_env_vars(self):
-        return {SCRIPT_PARAM_NAME.upper(): self.uploaded_code.script_name,
-                DIR_PARAM_NAME.upper(): self.uploaded_code.s3_prefix,
-                CLOUDWATCH_METRICS_PARAM_NAME.upper(): str(self.enable_cloudwatch_metrics).lower(),
-                CONTAINER_LOG_LEVEL_PARAM_NAME.upper(): str(self.container_log_level),
-                SAGEMAKER_REGION_PARAM_NAME.upper(): self.sagemaker_session.boto_session.region_name}
+        if self.uploaded_code:
+            script_name = self.uploaded_code.script_name
+            dir_name = self.uploaded_code.s3_prefix
+        else:
+            script_name = self.entry_point
+            dir_name = 'file://' + self.source_dir
+
+        return {
+            SCRIPT_PARAM_NAME.upper(): script_name,
+            DIR_PARAM_NAME.upper(): dir_name,
+            CLOUDWATCH_METRICS_PARAM_NAME.upper(): str(self.enable_cloudwatch_metrics).lower(),
+            CONTAINER_LOG_LEVEL_PARAM_NAME.upper(): str(self.container_log_level),
+            SAGEMAKER_REGION_PARAM_NAME.upper(): self.sagemaker_session.region_name
+        }
@@ -65,7 +65,7 @@ def train_image(self):
         Returns:
             str: The URI of the Docker image.
         """
-        return create_image_uri(self.sagemaker_session.boto_session.region_name, self.__framework_name__,
+        return create_image_uri(self.sagemaker_session.region_name, self.__framework_name__,
                                 self.train_instance_type, framework_version=self.framework_version,
                                 py_version=self.py_version)