Skip to content

Commit fe50f65

Browse files
authored
merge from master
2 parents 73b6681 + 57fdf88 commit fe50f65

File tree

220 files changed

+22084
-8312
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

220 files changed

+22084
-8312
lines changed

.github/CODEOWNERS

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,7 @@
77
#
88
# @See https://help.github.com/articles/about-codeowners/
99

10-
/sagemaker-experiments/* @aws/sagemakerexperimentsadmin
10+
/sagemaker-experiments/* @aws/sagemakerexperimentsadmin
11+
12+
# Community contributed
13+
/contrib/ @aws/sagemaker-notebook-sas

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,5 @@
33
**/__pycache__
44
**/.aws-sam
55
.DS_Store
6+
7+
**/_build

.readthedocs.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# ReadTheDocs environment customization to allow us to use conda to install
2+
# libraries which have C dependencies for the doc build. See:
3+
# https://docs.readthedocs.io/en/latest/config-file/v2.html
4+
5+
version: 2
6+
7+
conda:
8+
environment: environment.yml
9+
10+
python:
11+
version: 3.6
12+
13+
sphinx:
14+
configuration: conf.py
15+
fail_on_warning: false

Makefile

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Minimal makefile for Sphinx documentation
2+
#
3+
4+
# You can set these variables from the command line, and also
5+
# from the environment for the first two.
6+
SPHINXOPTS ?=
7+
SPHINXBUILD ?= sphinx-build
8+
SOURCEDIR = .
9+
BUILDDIR = _build
10+
11+
# Put it first so that "make" without argument is like "make help".
12+
help:
13+
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14+
15+
.PHONY: help Makefile
16+
17+
# Catch-all target: route all unknown targets to Sphinx using the new
18+
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19+
%: Makefile
20+
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ The following provide examples demonstrating different capabilities of Amazon Sa
7777
- [Knapsack Problem](reinforcement_learning/rl_knapsack_coach_custom) demonstrates how to solve the knapsack problem using a custom environment.
7878
- [Mountain Car](reinforcement_learning/rl_mountain_car_coach_gymEnv) Mountain car is a classic RL problem. This notebook explains how to solve this using the OpenAI Gym environment.
7979
- [Distributed Neural Network Compression](reinforcement_learning/rl_network_compression_ray_custom) This notebook explains how to compress ResNets using RL, using a custom environment and the RLLib toolkit.
80-
- [Turtlebot Tracker](reinforcement_learning/rl_objecttracker_robomaker_coach_gazebo) This notebook demonstrates object tracking using AWS Robomaker and RL Coach in the Gazebo environment.
8180
- [Portfolio Management](reinforcement_learning/rl_portfolio_management_coach_customEnv) This notebook uses a custom Gym environment to manage multiple financial investments.
8281
- [Autoscaling](reinforcement_learning/rl_predictive_autoscaling_coach_customEnv) demonstrates how to adjust load depending on demand. This uses RL Coach and a custom environment.
8382
- [Roboschool](reinforcement_learning/rl_roboschool_ray) is an open source physics simulator that is commonly used to train RL policies for robotic systems. This notebook demonstrates training a few agents using it.
@@ -141,7 +140,7 @@ These examples provide you an introduction to how to use Neo to optimizes deep l
141140
- [Distributed TensorFlow](sagemaker_neo_compilation_jobs/tensorflow_distributed_mnist) Adapts form [tensorflow mnist](sagemaker-python-sdk/tensorflow_distributed_mnist) including Neo API and comparsion between the baseline
142141
- [Predicting Customer Churn](sagemaker_neo_compilation_jobs/xgboost_customer_churn) Adapts form [xgboost customer churn](introduction_to_applying_machine_learning/xgboost_customer_churn) including Neo API and comparsion between the baseline
143142

144-
### Amazon SageMaker Procesing
143+
### Amazon SageMaker Processing
145144

146145
These examples show you how to use SageMaker Processing jobs to run data processing workloads.
147146

_static/js/analytics.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
console.log("Starting analytics...");
2+
var s_code=s.t();if(s_code)document.write(s_code)
Loading

_static/sagemaker_gears.jpg

26.3 KB
Loading

advanced_functionality/autogluon-tabular/AutoGluon_Tabular_SageMaker.ipynb

Lines changed: 42 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -43,17 +43,18 @@
4343
},
4444
"outputs": [],
4545
"source": [
46-
"# Imports\n",
4746
"import os\n",
4847
"import boto3\n",
4948
"import sagemaker\n",
5049
"from time import sleep\n",
5150
"from collections import Counter\n",
5251
"import numpy as np\n",
5352
"import pandas as pd\n",
54-
"from sagemaker import get_execution_role, local, Model, utils, fw_utils, s3\n",
53+
"from sagemaker import get_execution_role, local, Model, utils, s3\n",
5554
"from sagemaker.estimator import Estimator\n",
56-
"from sagemaker.predictor import RealTimePredictor, csv_serializer, StringDeserializer\n",
55+
"from sagemaker.predictor import Predictor\n",
56+
"from sagemaker.serializers import CSVSerializer\n",
57+
"from sagemaker.deserializers import StringDeserializer\n",
5758
"from sklearn.metrics import accuracy_score, classification_report\n",
5859
"from IPython.core.display import display, HTML\n",
5960
"from IPython.core.interactiveshell import InteractiveShell\n",
@@ -74,9 +75,10 @@
7475
" \"sts\", region_name=region, endpoint_url=utils.sts_regional_endpoint(region)\n",
7576
" )\n",
7677
"account = client.get_caller_identity()['Account']\n",
77-
"ecr_uri_prefix = utils.get_ecr_image_uri_prefix(account, region)\n",
78-
"registry_id = fw_utils._registry_id(region, 'mxnet', 'py3', account, '1.6.0')\n",
79-
"registry_uri = utils.get_ecr_image_uri_prefix(registry_id, region)"
78+
"\n",
79+
"registry_uri_training = sagemaker.image_uris.retrieve('mxnet', region, version= '1.6.0', py_version='py3', instance_type='ml.m5.2xlarge', image_scope='training')\n",
80+
"registry_uri_inference = sagemaker.image_uris.retrieve('mxnet', region, version= '1.6.0', py_version='py3', instance_type='ml.m5.2xlarge', image_scope='inference')\n",
81+
"ecr_uri_prefix = account +'.'+'.'.join(registry_uri_training.split('/')[0].split('.')[1:])"
8082
]
8183
},
8284
{
@@ -291,7 +293,12 @@
291293
"hyperparameters = {\n",
292294
" 'fit_args': fit_args,\n",
293295
" 'feature_importance': True\n",
294-
"}"
296+
"}\n",
297+
"\n",
298+
"tags = [{\n",
299+
" 'Key' : 'AlgorithmName',\n",
300+
" 'Value' : 'AutoGluon-Tabular'\n",
301+
"}]"
295302
]
296303
},
297304
{
@@ -323,19 +330,38 @@
323330
"\n",
324331
"ecr_image = f'{ecr_uri_prefix}/{training_algorithm_name}:latest'\n",
325332
"\n",
326-
"estimator = Estimator(image_name=ecr_image,\n",
333+
"estimator = Estimator(image_uri=ecr_image,\n",
327334
" role=role,\n",
328-
" train_instance_count=1,\n",
329-
" train_instance_type=instance_type,\n",
335+
" instance_count=1,\n",
336+
" instance_type=instance_type,\n",
330337
" hyperparameters=hyperparameters,\n",
331-
" train_volume_size=100)\n",
338+
" volume_size=100,\n",
339+
" tags=tags)\n",
332340
"\n",
333341
"# Set inputs. Test data is optional, but requires a label column.\n",
334342
"inputs = {'training': train_s3_path, 'testing': test_s3_path}\n",
335343
"\n",
336344
"estimator.fit(inputs)"
337345
]
338346
},
347+
{
348+
"cell_type": "markdown",
349+
"metadata": {},
350+
"source": [
351+
"### Review the performance of the trained model"
352+
]
353+
},
354+
{
355+
"cell_type": "code",
356+
"execution_count": null,
357+
"metadata": {},
358+
"outputs": [],
359+
"source": [
360+
"from utils.ag_utils import launch_viewer\n",
361+
"\n",
362+
"launch_viewer(is_debug=False)"
363+
]
364+
},
339365
{
340366
"cell_type": "markdown",
341367
"metadata": {
@@ -354,10 +380,10 @@
354380
"outputs": [],
355381
"source": [
356382
"# Create predictor object\n",
357-
"class AutoGluonTabularPredictor(RealTimePredictor):\n",
383+
"class AutoGluonTabularPredictor(Predictor):\n",
358384
" def __init__(self, *args, **kwargs):\n",
359-
" super().__init__(*args, content_type='text/csv', \n",
360-
" serializer=csv_serializer, \n",
385+
" super().__init__(*args, \n",
386+
" serializer=CSVSerializer(), \n",
361387
" deserializer=StringDeserializer(), **kwargs)"
362388
]
363389
},
@@ -372,10 +398,10 @@
372398
"ecr_image = f'{ecr_uri_prefix}/{inference_algorithm_name}:latest'\n",
373399
"\n",
374400
"if instance_type == 'local':\n",
375-
" model = estimator.create_model(image=ecr_image, role=role)\n",
401+
" model = estimator.create_model(image_uri=ecr_image, role=role)\n",
376402
"else:\n",
377403
" model_uri = os.path.join(estimator.output_path, estimator._current_job_name, \"output\", \"model.tar.gz\")\n",
378-
" model = Model(model_uri, ecr_image, role=role, sagemaker_session=session, predictor_cls=AutoGluonTabularPredictor)"
404+
" model = Model(ecr_image, model_data=model_uri, role=role, sagemaker_session=session, predictor_cls=AutoGluonTabularPredictor)"
379405
]
380406
},
381407
{

advanced_functionality/autogluon-tabular/container-inference/Dockerfile.inference

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
ARG REGISTRY_URI
2-
FROM ${REGISTRY_URI}/mxnet-inference:1.6.0-cpu-py3
2+
FROM ${REGISTRY_URI}
33

44
RUN pip install autogluon
55
RUN pip install PrettyTable

advanced_functionality/autogluon-tabular/container-training/Dockerfile.training

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
ARG REGISTRY_URI
2-
FROM ${REGISTRY_URI}/mxnet-training:1.6.0-cpu-py3
2+
FROM ${REGISTRY_URI}
33

44
RUN pip install autogluon
55
RUN pip install PrettyTable
@@ -9,6 +9,9 @@ ENV PATH="/opt/ml/code:${PATH}"
99
COPY container-training/train.py /opt/ml/code/train.py
1010
COPY container-training/inference.py /opt/ml/code/inference.py
1111

12+
# Install seaborn for plot
13+
RUN pip install seaborn
14+
1215
# this environment variable is used by the SageMaker PyTorch container to determine our user code directory.
1316
ENV SAGEMAKER_SUBMIT_DIRECTORY /opt/ml/code
1417

advanced_functionality/autogluon-tabular/container-training/train.py

Lines changed: 106 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
import autogluon as ag
2020
from autogluon import TabularPrediction as task
2121
from autogluon.task.tabular_prediction import TabularDataset
22+
from autogluon.utils.tabular.ml.constants import BINARY, MULTICLASS, REGRESSION, SOFTCLASS
23+
24+
print(f'DEBUG AutoGluon version : {ag.__version__}')
2225

2326

2427
# ------------------------------------------------------------ #
@@ -49,7 +52,59 @@ def format_for_print(df):
4952
table.add_row(row[1:])
5053
return str(table)
5154

55+
def get_roc_auc(y_test_true, y_test_pred, labels, class_labels_internal, model_output_dir):
56+
from sklearn.preprocessing import label_binarize
57+
from sklearn.metrics import roc_curve, auc
58+
59+
from itertools import cycle
60+
61+
y_test_true_binalized = label_binarize(y_test_true, classes=labels)
62+
63+
if len(labels) == 2:
64+
# binary classification
65+
true_label_index = class_labels_internal.index(1)
66+
y_test_pred = y_test_pred[:,true_label_index]
67+
y_test_pred = np.reshape(y_test_pred, (-1, 1))
68+
labels = labels[true_label_index:true_label_index+1]
69+
n_classes = 1
70+
else:
71+
# multiclass classification
72+
n_classes = len(labels)
73+
74+
# Compute ROC curve and ROC area for each class
75+
fpr = dict()
76+
tpr = dict()
77+
roc_auc = dict()
78+
79+
for i in range(n_classes):
80+
fpr[i], tpr[i], _ = roc_curve(y_test_true_binalized[:, i], y_test_pred[:, i])
81+
roc_auc[i] = auc(fpr[i], tpr[i])
82+
83+
# Compute micro-average ROC curve and ROC area
84+
fpr["micro"], tpr["micro"], _ = roc_curve(y_test_true_binalized.ravel(), y_test_pred.ravel())
85+
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
86+
87+
sns.set(font_scale=1)
88+
plt.figure()
89+
lw = 2
90+
colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
91+
92+
for i, color in zip(range(n_classes), colors):
93+
plt.plot(fpr[i], tpr[i], color=color,
94+
lw=lw, label=f'ROC curve for {labels[i]} (area = %0.2f)' % roc_auc[i])
95+
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
96+
plt.xlim([0.0, 1.0])
97+
plt.ylim([0.0, 1.05])
98+
plt.xlabel('False Positive Rate')
99+
plt.ylabel('True Positive Rate')
100+
plt.title('Receiver operating characteristic example')
101+
plt.legend(loc="lower right")
102+
plt.show()
103+
plt.savefig(f'{model_output_dir}/roc_auc_curve.png')
104+
52105
def train(args):
106+
model_output_dir = f'{args.output_dir}/data'
107+
53108
is_distributed = len(args.hosts) > 1
54109
host_rank = args.hosts.index(args.current_host)
55110
dist_ip_addrs = args.hosts
@@ -74,7 +129,23 @@ def train(args):
74129
)
75130

76131
# Results summary
77-
predictor.fit_summary(verbosity=1)
132+
predictor.fit_summary(verbosity=3)
133+
model_summary_fname_src = os.path.join(predictor.output_directory, 'SummaryOfModels.html')
134+
model_summary_fname_tgt = os.path.join(model_output_dir, 'SummaryOfModels.html')
135+
136+
if os.path.exists(model_summary_fname_src):
137+
shutil.copy(model_summary_fname_src, model_summary_fname_tgt)
138+
139+
# ensemble visualization
140+
G = predictor._trainer.model_graph
141+
remove = [node for node,degree in dict(G.degree()).items() if degree < 1]
142+
G.remove_nodes_from(remove)
143+
A = nx.nx_agraph.to_agraph(G)
144+
A.graph_attr.update(rankdir='BT')
145+
A.node_attr.update(fontsize=10)
146+
for node in A.iternodes():
147+
node.attr['shape'] = 'rectagle'
148+
A.draw(os.path.join(model_output_dir, 'ensemble-model.png'), format='png', prog='dot')
78149

79150
# Optional test data
80151
if args.test:
@@ -86,6 +157,7 @@ def train(args):
86157
print('Running model on test data and getting Leaderboard...')
87158
leaderboard = predictor.leaderboard(dataset=test_data, silent=True)
88159
print(format_for_print(leaderboard), end='\n\n')
160+
leaderboard.to_csv(f'{model_output_dir}/leaderboard.csv', index=False)
89161

90162
# Feature importance on test data
91163
# Note: Feature importance must be calculated on held-out (test) data.
@@ -94,7 +166,36 @@ def train(args):
94166
print('Feature importance:')
95167
# Increase rows to print feature importance
96168
pd.set_option('display.max_rows', 500)
97-
print(predictor.feature_importance(test_data))
169+
feature_importance = predictor.feature_importance(test_data)
170+
feature_importance_df = pd.DataFrame(feature_importance, columns=['Importance score']).rename_axis(index='Feature')
171+
print(feature_importance_df)
172+
feature_importance_df.to_csv(f'{model_output_dir}/feature_importance.csv', index=True)
173+
174+
# Classification report and confusion matrix for classification model
175+
if predictor.problem_type in [BINARY, MULTICLASS]:
176+
from sklearn.metrics import classification_report, confusion_matrix
177+
178+
X_test = test_data.drop(args.fit_args['label'], axis=1)
179+
y_test_true = test_data[args.fit_args['label']]
180+
y_test_pred = predictor.predict(X_test)
181+
y_test_pred_prob = predictor.predict_proba(X_test, as_multiclass=True)
182+
183+
report_dict = classification_report(y_test_true, y_test_pred, output_dict=True, labels=predictor.class_labels)
184+
report_dict_df = pd.DataFrame(report_dict).T
185+
report_dict_df.to_csv(f'{model_output_dir}/classification_report.csv', index=True)
186+
187+
cm = confusion_matrix(y_test_true, y_test_pred, labels=predictor.class_labels)
188+
cm_df = pd.DataFrame(cm, predictor.class_labels, predictor.class_labels)
189+
sns.set(font_scale=1)
190+
cmap = 'coolwarm'
191+
sns.heatmap(cm_df, annot=True, fmt='d', cmap=cmap)
192+
plt.title('Confusion Matrix')
193+
plt.ylabel('true label')
194+
plt.xlabel('predicted label')
195+
plt.show()
196+
plt.savefig(f'{model_output_dir}/confusion_matrix.png')
197+
198+
get_roc_auc(y_test_true, y_test_pred_prob, predictor.class_labels, predictor.class_labels_internal, model_output_dir)
98199
else:
99200
warnings.warn('Skipping eval on test data since label column is not included.')
100201

@@ -119,6 +220,7 @@ def parse_args():
119220
parser.add_argument('--current-host', type=str, default=os.environ['SM_CURRENT_HOST'])
120221
parser.add_argument('--num-gpus', type=int, default=os.environ['SM_NUM_GPUS'])
121222
parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])
223+
parser.add_argument('--output-dir', type=str, default=os.environ['SM_OUTPUT_DIR'])
122224
parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAINING'])
123225
# Arguments to be passed to task.fit()
124226
parser.add_argument('--fit_args', type=lambda s: ast.literal_eval(s),
@@ -164,4 +266,5 @@ def parse_args():
164266
subprocess.call('cp columns.pkl /opt/ml/model/code/'.split())
165267

166268
elapsed_time = round(timer()-start,3)
167-
print(f'Elapsed time: {elapsed_time} seconds. Training Completed!')
269+
print(f'Elapsed time: {elapsed_time} seconds. Training Completed!')
270+

0 commit comments

Comments
 (0)