Skip to content

Commit 2507d19

Browse files
authored
Merge branch 'master' into callback-param-bug-fix
2 parents 6deec71 + 9e7b4b5 commit 2507d19

File tree

3 files changed

+30
-8
lines changed

3 files changed

+30
-8
lines changed

src/sagemaker/inputs.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ def __init__(
7070
a local directory.
7171
* 'Pipe' - Amazon SageMaker streams data directly from S3 to the container via
7272
a Unix-named pipe.
73+
* 'FastFile' - Amazon SageMaker streams data from S3 on demand instead of
74+
downloading the entire dataset before training begins.
7375
7476
attribute_names (list[str]): A list of one or more attribute names to use that are
7577
found in a specified AugmentedManifestFile.

src/sagemaker/session.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,8 @@ def train( # noqa: C901
467467
a directory in the Docker container.
468468
* 'Pipe' - Amazon SageMaker streams data directly from S3 to the container via a
469469
Unix-named pipe.
470+
* 'FastFile' - Amazon SageMaker streams data from S3 on demand instead of
471+
downloading the entire dataset before training begins.
470472
input_config (list): A list of Channel objects. Each channel is a named input source.
471473
Please refer to the format details described:
472474
https://botocore.readthedocs.io/en/latest/reference/services/sagemaker.html#SageMaker.Client.create_training_job
@@ -609,6 +611,8 @@ def _get_train_request( # noqa: C901
609611
a directory in the Docker container.
610612
* 'Pipe' - Amazon SageMaker streams data directly from S3 to the container via a
611613
Unix-named pipe.
614+
* 'FastFile' - Amazon SageMaker streams data from S3 on demand instead of
615+
downloading the entire dataset before training begins.
612616
input_config (list): A list of Channel objects. Each channel is a named input source.
613617
Please refer to the format details described:
614618
https://botocore.readthedocs.io/en/latest/reference/services/sagemaker.html#SageMaker.Client.create_training_job
@@ -1897,6 +1901,8 @@ def tune( # noqa: C901
18971901
a directory in the Docker container.
18981902
* 'Pipe' - Amazon SageMaker streams data directly from S3 to the container via a
18991903
Unix-named pipe.
1904+
* 'FastFile' - Amazon SageMaker streams data from S3 on demand instead of
1905+
downloading the entire dataset before training begins.
19001906
metric_definitions (list[dict]): A list of dictionaries that defines the metric(s)
19011907
used to evaluate the training jobs. Each dictionary contains two keys: 'Name' for
19021908
the name of the metric, and 'Regex' for the regular expression used to extract the
@@ -2180,6 +2186,8 @@ def _map_training_config(
21802186
a directory in the Docker container.
21812187
* 'Pipe' - Amazon SageMaker streams data directly from S3 to the container via a
21822188
Unix-named pipe.
2189+
* 'FastFile' - Amazon SageMaker streams data from S3 on demand instead of
2190+
downloading the entire dataset before training begins.
21832191
role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training
21842192
jobs and APIs that create Amazon SageMaker endpoints use this role to access
21852193
training data and model artifacts. You must grant sufficient permissions to

src/sagemaker/workflow/steps.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,12 @@
1616
import abc
1717

1818
from enum import Enum
19-
from typing import Dict, List
19+
from typing import Dict, List, Union
2020

2121
import attr
2222

2323
from sagemaker.estimator import EstimatorBase, _TrainingJob
24-
from sagemaker.inputs import (
25-
CreateModelInput,
26-
TrainingInput,
27-
TransformInput,
28-
)
24+
from sagemaker.inputs import CreateModelInput, TrainingInput, TransformInput, FileSystemInput
2925
from sagemaker.model import Model
3026
from sagemaker.processing import (
3127
ProcessingInput,
@@ -145,7 +141,7 @@ def __init__(
145141
self,
146142
name: str,
147143
estimator: EstimatorBase,
148-
inputs: TrainingInput = None,
144+
inputs: Union[TrainingInput, dict, str, FileSystemInput] = None,
149145
cache_config: CacheConfig = None,
150146
depends_on: List[str] = None,
151147
):
@@ -157,7 +153,23 @@ def __init__(
157153
Args:
158154
name (str): The name of the training step.
159155
estimator (EstimatorBase): A `sagemaker.estimator.EstimatorBase` instance.
160-
inputs (TrainingInput): A `sagemaker.inputs.TrainingInput` instance. Defaults to `None`.
156+
inputs (str or dict or sagemaker.inputs.TrainingInput
157+
or sagemaker.inputs.FileSystemInput): Information
158+
about the training data. This can be one of three types:
159+
160+
* (str) the S3 location where training data is saved, or a file:// path in
161+
local mode.
162+
* (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) If using multiple
163+
channels for training data, you can specify a dict mapping channel names to
164+
strings or :func:`~sagemaker.inputs.TrainingInput` objects.
165+
* (sagemaker.inputs.TrainingInput) - channel configuration for S3 data sources
166+
that can provide additional information as well as the path to the training
167+
dataset.
168+
See :func:`sagemaker.inputs.TrainingInput` for full details.
169+
* (sagemaker.inputs.FileSystemInput) - channel configuration for
170+
a file system data source that can provide additional information as well as
171+
the path to the training dataset.
172+
161173
cache_config (CacheConfig): A `sagemaker.workflow.steps.CacheConfig` instance.
162174
depends_on (List[str]): A list of step names this `sagemaker.workflow.steps.TrainingStep`
163175
depends on

0 commit comments

Comments
 (0)