Skip to content

Commit 8a35cba

Browse files
committed
Merge remote-tracking branch 'origin/master' into feat/jumpstart-model-id-list
2 parents 2bb0f19 + 51706c4 commit 8a35cba

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+1595
-186
lines changed

CHANGELOG.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,40 @@
11
# Changelog
22

3+
## v2.86.2 (2022-04-14)
4+
5+
### Bug Fixes and Other Changes
6+
7+
* #using uuid to randomize, otherwise system timestamp is used
8+
9+
## v2.86.1 (2022-04-13)
10+
11+
### Bug Fixes and Other Changes
12+
13+
* xgboost, sklearn network isolation for jumpstart
14+
15+
### Documentation Changes
16+
17+
* fix minor typo
18+
19+
## v2.86.0 (2022-04-12)
20+
21+
### Features
22+
23+
* Adds Spark Processing Notebook to Notebook Tests
24+
25+
## v2.85.0 (2022-04-11)
26+
27+
### Features
28+
29+
* update lambda code on pipeline create/update/upsert for Lamb…
30+
* jumpstart model url
31+
* add serverless inference image_uri retrieve support
32+
33+
### Bug Fixes and Other Changes
34+
35+
* Add back the Fix for Pipeline variables related customer issues
36+
* Support file URIs in ProcessingStep's code parameter
37+
338
## v2.84.0 (2022-04-07)
439

540
### Features

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.84.1.dev0
1+
2.86.3.dev0

doc/doc_utils/jumpstart_doc_utils.py

Lines changed: 72 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,73 @@
1414
from urllib import request
1515
import json
1616
from packaging.version import Version
17+
from enum import Enum
18+
19+
20+
class Tasks(str, Enum):
21+
"""The ML task name as referenced in the infix of the model ID."""
22+
23+
IC = "ic"
24+
OD = "od"
25+
OD1 = "od1"
26+
SEMSEG = "semseg"
27+
IS = "is"
28+
TC = "tc"
29+
SPC = "spc"
30+
EQA = "eqa"
31+
TEXT_GENERATION = "textgeneration"
32+
IC_EMBEDDING = "icembedding"
33+
TC_EMBEDDING = "tcembedding"
34+
NER = "ner"
35+
SUMMARIZATION = "summarization"
36+
TRANSLATION = "translation"
37+
TABULAR_REGRESSION = "regression"
38+
TABULAR_CLASSIFICATION = "classification"
39+
40+
41+
class ProblemTypes(str, Enum):
42+
"""Possible problem types for JumpStart models."""
43+
44+
IMAGE_CLASSIFICATION = "Image Classification"
45+
IMAGE_EMBEDDING = "Image Embedding"
46+
OBJECT_DETECTION = "Object Detection"
47+
SEMANTIC_SEGMENTATION = "Semantic Segmentation"
48+
INSTANCE_SEGMENTATION = "Instance Segmentation"
49+
TEXT_CLASSIFICATION = "Text Classification"
50+
TEXT_EMBEDDING = "Text Embedding"
51+
QUESTION_ANSWERING = "Question Answering"
52+
SENTENCE_PAIR_CLASSIFICATION = "Sentence Pair Classification"
53+
TEXT_GENERATION = "Text Generation"
54+
TEXT_SUMMARIZATION = "Text Summarization"
55+
MACHINE_TRANSLATION = "Machine Translation"
56+
NAMED_ENTITY_RECOGNITION = "Named Entity Recognition"
57+
TABULAR_REGRESSION = "Regression"
58+
TABULAR_CLASSIFICATION = "Classification"
59+
1760

1861
JUMPSTART_REGION = "eu-west-2"
1962
SDK_MANIFEST_FILE = "models_manifest.json"
2063
JUMPSTART_BUCKET_BASE_URL = "https://jumpstart-cache-prod-{}.s3.{}.amazonaws.com".format(
2164
JUMPSTART_REGION, JUMPSTART_REGION
2265
)
66+
TASK_MAP = {
67+
Tasks.IC: ProblemTypes.IMAGE_CLASSIFICATION,
68+
Tasks.IC_EMBEDDING: ProblemTypes.IMAGE_EMBEDDING,
69+
Tasks.OD: ProblemTypes.OBJECT_DETECTION,
70+
Tasks.OD1: ProblemTypes.OBJECT_DETECTION,
71+
Tasks.SEMSEG: ProblemTypes.SEMANTIC_SEGMENTATION,
72+
Tasks.IS: ProblemTypes.INSTANCE_SEGMENTATION,
73+
Tasks.TC: ProblemTypes.TEXT_CLASSIFICATION,
74+
Tasks.TC_EMBEDDING: ProblemTypes.TEXT_EMBEDDING,
75+
Tasks.EQA: ProblemTypes.QUESTION_ANSWERING,
76+
Tasks.SPC: ProblemTypes.SENTENCE_PAIR_CLASSIFICATION,
77+
Tasks.TEXT_GENERATION: ProblemTypes.TEXT_GENERATION,
78+
Tasks.SUMMARIZATION: ProblemTypes.TEXT_SUMMARIZATION,
79+
Tasks.TRANSLATION: ProblemTypes.MACHINE_TRANSLATION,
80+
Tasks.NER: ProblemTypes.NAMED_ENTITY_RECOGNITION,
81+
Tasks.TABULAR_REGRESSION: ProblemTypes.TABULAR_REGRESSION,
82+
Tasks.TABULAR_CLASSIFICATION: ProblemTypes.TABULAR_CLASSIFICATION,
83+
}
2384

2485

2586
def get_jumpstart_sdk_manifest():
@@ -36,6 +97,11 @@ def get_jumpstart_sdk_spec(key):
3697
return json.loads(model_spec)
3798

3899

100+
def get_model_task(id):
101+
task_short = id.split("-")[1]
102+
return TASK_MAP[task_short] if task_short in TASK_MAP else "Source"
103+
104+
39105
def create_jumpstart_model_table():
40106
sdk_manifest = get_jumpstart_sdk_manifest()
41107
sdk_manifest_top_versions_for_models = {}
@@ -69,26 +135,29 @@ def create_jumpstart_model_table():
69135
)
70136
file_content.append(
71137
"""
72-
Each model ID is linked to an external page that describes the model.\n
138+
Click on the Problem Type to navigate to the source of the model.\n
73139
"""
74140
)
75141
file_content.append("\n")
76142
file_content.append(".. list-table:: Available Models\n")
77-
file_content.append(" :widths: 50 20 20 20\n")
143+
file_content.append(" :widths: 50 20 20 20 30\n")
78144
file_content.append(" :header-rows: 1\n")
79145
file_content.append(" :class: datatable\n")
80146
file_content.append("\n")
81147
file_content.append(" * - Model ID\n")
82148
file_content.append(" - Fine Tunable?\n")
83149
file_content.append(" - Latest Version\n")
84150
file_content.append(" - Min SDK Version\n")
151+
file_content.append(" - Problem Type/Source\n")
85152

86153
for model in sdk_manifest_top_versions_for_models.values():
87154
model_spec = get_jumpstart_sdk_spec(model["spec_key"])
88-
file_content.append(" * - `{} <{}>`_\n".format(model_spec["model_id"], model_spec["url"]))
155+
model_task = get_model_task(model_spec["model_id"])
156+
file_content.append(" * - {}\n".format(model_spec["model_id"]))
89157
file_content.append(" - {}\n".format(model_spec["training_supported"]))
90158
file_content.append(" - {}\n".format(model["version"]))
91159
file_content.append(" - {}\n".format(model["min_version"]))
160+
file_content.append(" - `{} <{}>`__\n".format(model_task, model_spec["url"]))
92161

93162
f = open("doc_utils/jumpstart.rst", "w")
94163
f.writelines(file_content)

doc/frameworks/tensorflow/deploying_tensorflow_serving.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,8 +272,8 @@ More information on how to create ``export_outputs`` can be found in `specifying
272272
refer to TensorFlow's `Save and Restore <https://www.tensorflow.org/guide/saved_model>`_ documentation for other ways to control the
273273
inference-time behavior of your SavedModels.
274274

275-
Providing Python scripts for pre/pos-processing
276-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
275+
Providing Python scripts for pre/post-processing
276+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
277277

278278
You can add your customized Python code to process your input and output data.
279279
This customized Python code must be named ``inference.py`` and specified through the ``entry_point`` parameter:

doc/overview.rst

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -573,15 +573,49 @@ Here is an example:
573573
# When you are done using your endpoint
574574
model.sagemaker_session.delete_endpoint('my-endpoint')
575575
576-
********************************************
577-
Use Prebuilt Models with SageMaker JumpStart
578-
********************************************
576+
*********************************************************
577+
Use SageMaker JumpStart Algorithms with Pretrained Models
578+
*********************************************************
579+
580+
JumpStart for the SageMaker Python SDK uses model ids and model versions to access the necessary
581+
utilities. This table serves to provide the core material plus some extra information that can be useful
582+
in selecting the correct model id and corresponding parameters.
579583

580584
.. toctree::
581585
:maxdepth: 2
582586

583587
doc_utils/jumpstart
584588

589+
Example notebooks
590+
=================
591+
592+
JumpStart supports 15 different machine learning problem types. Below is a list of all the supported
593+
problem types with a link to a Jupyter notebook that provides example usage.
594+
595+
Vision
596+
- `Image Classification <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_image_classification/Amazon_JumpStart_Image_Classification.ipynb>`__
597+
- `Object Detection <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_object_detection/Amazon_JumpStart_Object_Detection.ipynb>`__
598+
- `Semantic Segmentation <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_semantic_segmentation/Amazon_JumpStart_Semantic_Segmentation.ipynb>`__
599+
- `Instance Segmentation <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_instance_segmentation/Amazon_JumpStart_Instance_Segmentation.ipynb>`__
600+
- `Image Embedding <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_image_embedding/Amazon_JumpStart_Image_Embedding.ipynb>`__
601+
602+
Text
603+
- `Text Classification <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_text_classification/Amazon_JumpStart_Text_Classification.ipynb>`__
604+
- `Sentence Pair Classification <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_sentence_pair_classification/Amazon_JumpStart_Sentence_Pair_Classification.ipynb>`__
605+
- `Question Answering <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_question_answering/Amazon_JumpStart_Question_Answering.ipynb>`__
606+
- `Named Entity Recognition <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_named_entity_recognition/Amazon_JumpStart_Named_Entity_Recognition.ipynb>`__
607+
- `Text Summarization <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_text_summarization/Amazon_JumpStart_Text_Summarization.ipynb>`__
608+
- `Text Generation <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_text_generation/Amazon_JumpStart_Text_Generation.ipynb>`__
609+
- `Machine Translation <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_machine_translation/Amazon_JumpStart_Machine_Translation.ipynb>`__
610+
- `Text Embedding <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_text_embedding/Amazon_JumpStart_Text_Embedding.ipynb>`__
611+
612+
Tabular
613+
- `Tabular Classification (LightGBM & Catboost) <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_tabular_classification/Amazon_JumpStart_Tabular_Classification_LightGBM_CatBoost.ipynb>`__
614+
- `Tabular Classification (XGBoost & Linear Learner) <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_tabular_classification/Amazon_JumpStart_Tabular_Classification_XGBoost_LinearLearner.ipynb>`__
615+
- `Tabular Regression (LightGBM & Catboost) <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_tabular_regression/Amazon_JumpStart_Tabular_Regression_LightGBM_CatBoost.ipynb>`__
616+
- `Tabular Regression (XGBoost & Linear Learner) <https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart_tabular_regression/Amazon_JumpStart_Tabular_Regression_XGBoost_LinearLearner.ipynb>`__
617+
618+
585619
`Amazon SageMaker JumpStart <https://aws.amazon.com/sagemaker/getting-started/>`__ is a
586620
SageMaker feature that helps users bring machine learning (ML)
587621
applications to market using prebuilt solutions for common use cases,

src/sagemaker/estimator.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@
7474
get_config_value,
7575
name_from_base,
7676
)
77-
from sagemaker.workflow.entities import PipelineVariable
77+
from sagemaker.workflow import is_pipeline_variable
7878

7979
logger = logging.getLogger(__name__)
8080

@@ -457,7 +457,7 @@ def __init__(
457457
self._hyperparameters = hyperparameters.copy() if hyperparameters else {}
458458
self.code_location = code_location
459459
self.entry_point = entry_point
460-
self.dependencies = dependencies
460+
self.dependencies = dependencies or []
461461
self.uploaded_code = None
462462
self.tags = add_jumpstart_tags(
463463
tags=tags, training_model_uri=self.model_uri, training_script_uri=self.source_dir
@@ -600,7 +600,7 @@ def _json_encode_hyperparameters(hyperparameters: Dict[str, Any]) -> Dict[str, A
600600
current_hyperparameters = hyperparameters
601601
if current_hyperparameters is not None:
602602
hyperparameters = {
603-
str(k): (v.to_string() if isinstance(v, PipelineVariable) else json.dumps(v))
603+
str(k): (v.to_string() if is_pipeline_variable(v) else json.dumps(v))
604604
for (k, v) in current_hyperparameters.items()
605605
}
606606
return hyperparameters
@@ -1811,7 +1811,7 @@ def _get_train_args(cls, estimator, inputs, experiment_config):
18111811
current_hyperparameters = estimator.hyperparameters()
18121812
if current_hyperparameters is not None:
18131813
hyperparameters = {
1814-
str(k): (v.to_string() if isinstance(v, PipelineVariable) else str(v))
1814+
str(k): (v.to_string() if is_pipeline_variable(v) else str(v))
18151815
for (k, v) in current_hyperparameters.items()
18161816
}
18171817

@@ -1879,7 +1879,9 @@ def _add_spot_checkpoint_args(cls, local_mode, estimator, train_args):
18791879
if estimator.use_spot_instances:
18801880
if local_mode:
18811881
raise ValueError("Spot training is not supported in local mode.")
1882-
train_args["use_spot_instances"] = True
1882+
# estimator.use_spot_instances may be a Pipeline ParameterBoolean object
1883+
# which is parsed during the Pipeline execution runtime
1884+
train_args["use_spot_instances"] = estimator.use_spot_instances
18831885

18841886
if estimator.checkpoint_s3_uri:
18851887
if local_mode:

src/sagemaker/image_uri_config/huggingface-training-compiler.json

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
"training": {
33
"processors": ["gpu"],
44
"version_aliases": {
5-
"4.11": "4.11.0"
5+
"4.11": "4.11.0",
6+
"4.17": "4.17.0"
67
},
78
"versions": {
89
"4.11.0": {
@@ -32,6 +33,68 @@
3233
"repository": "huggingface-tensorflow-trcomp-training",
3334
"container_version": {"gpu":"cu112-ubuntu18.04"}
3435
}
36+
},
37+
"4.17.0": {
38+
"version_aliases": {
39+
"pytorch1.10": "pytorch1.10.2",
40+
"tensorflow2.6": "tensorflow2.6.3"
41+
},
42+
"pytorch1.10.2": {
43+
"py_versions": ["py38"],
44+
"registries": {
45+
"af-south-1": "626614931356",
46+
"ap-east-1": "871362719292",
47+
"ap-northeast-1": "763104351884",
48+
"ap-northeast-2": "763104351884",
49+
"ap-northeast-3": "364406365360",
50+
"ap-south-1": "763104351884",
51+
"ap-southeast-1": "763104351884",
52+
"ap-southeast-2": "763104351884",
53+
"ca-central-1": "763104351884",
54+
"eu-central-1": "763104351884",
55+
"eu-north-1": "763104351884",
56+
"eu-south-1": "692866216735",
57+
"eu-west-1": "763104351884",
58+
"eu-west-2": "763104351884",
59+
"eu-west-3": "763104351884",
60+
"me-south-1": "217643126080",
61+
"sa-east-1": "763104351884",
62+
"us-east-1": "763104351884",
63+
"us-east-2": "763104351884",
64+
"us-west-1": "763104351884",
65+
"us-west-2": "763104351884"
66+
},
67+
"repository": "huggingface-pytorch-trcomp-training",
68+
"container_version": {"gpu":"cu113-ubuntu20.04"}
69+
},
70+
"tensorflow2.6.3": {
71+
"py_versions": ["py38"],
72+
"registries": {
73+
"af-south-1": "626614931356",
74+
"ap-east-1": "871362719292",
75+
"ap-northeast-1": "763104351884",
76+
"ap-northeast-2": "763104351884",
77+
"ap-northeast-3": "364406365360",
78+
"ap-south-1": "763104351884",
79+
"ap-southeast-1": "763104351884",
80+
"ap-southeast-2": "763104351884",
81+
"ca-central-1": "763104351884",
82+
"eu-central-1": "763104351884",
83+
"eu-north-1": "763104351884",
84+
"eu-south-1": "692866216735",
85+
"eu-west-1": "763104351884",
86+
"eu-west-2": "763104351884",
87+
"eu-west-3": "763104351884",
88+
"me-south-1": "217643126080",
89+
"sa-east-1": "763104351884",
90+
"us-east-1": "763104351884",
91+
"us-east-2": "763104351884",
92+
"us-west-1": "763104351884",
93+
"us-west-2": "763104351884"
94+
},
95+
"repository": "huggingface-tensorflow-trcomp-training",
96+
"container_version": {"gpu":"cu112-ubuntu20.04"}
97+
}
3598
}
3699
}
37100
}

src/sagemaker/lambda_helper.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,18 @@ def update(self):
161161
error = e.response["Error"]
162162
raise ValueError(error)
163163

164+
def upsert(self):
165+
"""Method to create a lambda function or update it if it already exists
166+
167+
Returns: boto3 response from Lambda's methods.
168+
"""
169+
try:
170+
return self.create()
171+
except ValueError as error:
172+
if "ResourceConflictException" in str(error):
173+
return self.update()
174+
raise
175+
164176
def invoke(self):
165177
"""Method to invoke a lambda function.
166178

src/sagemaker/model.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
from sagemaker.utils import unique_name_from_base
3838
from sagemaker.async_inference import AsyncInferenceConfig
3939
from sagemaker.predictor_async import AsyncPredictor
40+
from sagemaker.workflow import is_pipeline_variable
4041

4142
LOGGER = logging.getLogger("sagemaker")
4243

@@ -449,7 +450,7 @@ def _upload_code(self, key_prefix: str, repack: bool = False) -> None:
449450
)
450451

451452
if repack and self.model_data is not None and self.entry_point is not None:
452-
if isinstance(self.model_data, sagemaker.workflow.properties.Properties):
453+
if is_pipeline_variable(self.model_data):
453454
# model is not yet there, defer repacking to later during pipeline execution
454455
return
455456

@@ -477,7 +478,7 @@ def _script_mode_env_vars(self):
477478
dir_name = None
478479
if self.uploaded_code:
479480
script_name = self.uploaded_code.script_name
480-
if self.enable_network_isolation():
481+
if self.repacked_model_data or self.enable_network_isolation():
481482
dir_name = "/opt/ml/model/code"
482483
else:
483484
dir_name = self.uploaded_code.s3_prefix

0 commit comments

Comments
 (0)