Skip to content

Commit 09ad9a7

Browse files
authored
Make RL training compatible with PyTorch (#1520)
* Make RLEstimator() PyTorch compatible & modify cartpole notebook * set use_pytorch to False by default * minor refactor; check in first unit test * indent correction
1 parent fbdca81 commit 09ad9a7

File tree

8 files changed

+60
-22
lines changed

8 files changed

+60
-22
lines changed

reinforcement_learning/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ These examples demonstrate how to train reinforcement learning models on SageMak
66

77
**IMPORTANT for rllib users:** Some examples may break with latest [rllib](https://docs.ray.io/en/latest/rllib.html) due to breaking API changes. Please refer to [Amazon SageMaker RL Container](https://github.com/aws/sagemaker-rl-container) for the latest public images and modify the configs in entrypoint scripts according to [rllib algorithm config](https://docs.ray.io/en/latest/rllib-algorithms.html).
88

9+
If you are using PyTorch rather than TensorFlow, please set `debugger_hook_config=False` when calling `RLEstimator()` to avoid TensorBoard conflicts.
10+
911
- [Contextual Bandit with Live Environment](bandits_statlog_vw_customEnv) illustrates how you can manage your own contextual multi-armed bandit workflow on SageMaker using the built-in [Vowpal Wabbit](https://github.com/VowpalWabbit/vowpal_wabbit) (VW) container to train and deploy contextual bandit models.
1012
- [Cartpole](rl_cartpole_coach) uses SageMaker RL base [docker image](https://github.com/aws/sagemaker-rl-container) to balance a broom upright.
1113
- [Cartpole Batch](rl_cartpole_batch_coach) uses batch RL techniques to train Cartpole with offline data.

reinforcement_learning/common/sagemaker_rl/ray_launcher.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -243,10 +243,13 @@ def create_tf_serving_model(self, algorithm=None, env_string=None):
243243
agent.restore(checkpoint)
244244
export_tf_serving(agent, MODEL_OUTPUT_DIR)
245245

246-
def save_checkpoint_and_serving_model(self, algorithm=None, env_string=None):
246+
def save_checkpoint_and_serving_model(self, algorithm=None, env_string=None, use_pytorch=False):
247247
self.save_experiment_config()
248248
self.copy_checkpoints_to_model_output()
249-
self.create_tf_serving_model(algorithm, env_string)
249+
if use_pytorch:
250+
print("Skipped PyTorch serving.")
251+
else:
252+
self.create_tf_serving_model(algorithm, env_string)
250253

251254
# To ensure SageMaker local mode works fine
252255
change_permissions_recursive(INTERMEDIATE_DIR, 0o777)
@@ -335,8 +338,10 @@ def launch(self):
335338

336339
algo = experiment_config["training"]["run"]
337340
env_string = experiment_config["training"]["config"]["env"]
341+
use_pytorch = experiment_config["training"]["config"].get("use_pytorch", False)
338342
self.save_checkpoint_and_serving_model(algorithm=algo,
339-
env_string=env_string)
343+
env_string=env_string,
344+
use_pytorch=use_pytorch)
340345

341346
@classmethod
342347
def train_main(cls):

reinforcement_learning/common/sagemaker_rl/tf_serving_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ def change_permissions_recursive(path, mode):
1717
for root, dirs, files in os.walk(path, topdown=False):
1818
for dir in [os.path.join(root, d) for d in dirs]:
1919
os.chmod(dir, mode)
20-
for file in [os.path.join(root, f) for f in files]:
21-
os.chmod(file, mode)
20+
for file in [os.path.join(root, f) for f in files]:
21+
os.chmod(file, mode)
2222

2323

2424
def export_tf_serving(agent, output_dir):

reinforcement_learning/common/tests/__init__.py

Whitespace-only changes.

reinforcement_learning/common/tests/unit/__init__.py

Whitespace-only changes.
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Copyright 2017-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.
13+
from __future__ import absolute_import
14+
import pytest
15+
from mock import Mock, MagicMock, patch
16+
17+
from sagemaker_rl.ray_launcher import SageMakerRayLauncher
18+
19+
@patch("sagemaker_rl.ray_launcher.SageMakerRayLauncher.__init__", return_value=None)
20+
@patch("sagemaker_rl.ray_launcher.change_permissions_recursive")
21+
def test_pytorch_save_checkpoint_and_serving_model(change_permission, launcher_init):
22+
launcher = SageMakerRayLauncher()
23+
launcher.copy_checkpoints_to_model_output = Mock()
24+
launcher.create_tf_serving_model = Mock()
25+
launcher.save_experiment_config = Mock()
26+
27+
launcher.save_checkpoint_and_serving_model(use_pytorch=True)
28+
launcher.create_tf_serving_model.assert_not_called()
29+
launcher.save_checkpoint_and_serving_model(use_pytorch=False)
30+
launcher.create_tf_serving_model.assert_called_once()
31+
assert 4 == change_permission.call_count

reinforcement_learning/rl_cartpole_ray/rl_cartpole_ray_gymEnv.ipynb

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"---\n",
1010
"## Introduction\n",
1111
"\n",
12-
"In this notebook we'll start from the cart-pole balancing problem, where a pole is attached by an un-actuated joint to a cart, moving along a frictionless track. Instead of applying control theory to solve the problem, this example shows how to solve the problem with reinforcement learning on Amazon SageMaker and Ray RLlib \n",
12+
"In this notebook we'll start from the cart-pole balancing problem, where a pole is attached by an un-actuated joint to a cart, moving along a frictionless track. Instead of applying control theory to solve the problem, this example shows how to solve the problem with reinforcement learning on Amazon SageMaker and Ray RLlib. You can choose either TensorFlow or PyTorch as your underlying DL framework.\n",
1313
"\n",
1414
"(For a similar example using Coach library, see this [link](../rl_cartpole_coach/rl_cartpole_coach_gymEnv.ipynb). Another Cart-pole example using Coach library and offline data can be found [here](../rl_cartpole_batch_coach/rl_cartpole_batch_coach.ipynb).)\n",
1515
"\n",
@@ -196,7 +196,8 @@
196196
"\n",
197197
"cpu_or_gpu = 'gpu' if instance_type.startswith('ml.p') else 'cpu'\n",
198198
"aws_region = boto3.Session().region_name\n",
199-
"custom_image_name = \"462105765813.dkr.ecr.%s.amazonaws.com/sagemaker-rl-ray-container:ray-0.8.5-tf-%s-py36\" % (aws_region, cpu_or_gpu)\n",
199+
"framework = 'tf' # change to 'torch' for PyTorch training\n",
200+
"custom_image_name = \"462105765813.dkr.ecr.%s.amazonaws.com/sagemaker-rl-ray-container:ray-0.8.5-%s-%s-py36\" % (aws_region, framework, cpu_or_gpu)\n",
200201
"custom_image_name"
201202
]
202203
},
@@ -206,8 +207,10 @@
206207
"source": [
207208
"## Write the Training Code\n",
208209
"\n",
209-
"The training code is written in the file “train-coach.py” which is uploaded in the /src directory. \n",
210-
"First import the environment files and the preset files, and then define the main() function. "
210+
"The training code is written in the file “train-rl-cartpole-ray.py” which is uploaded in the /src directory. \n",
211+
"First import the environment files and the preset files, and then define the main() function. \n",
212+
"\n",
213+
"**Note**: If PyTorch is used, plese update the above training code and set `use_pytorch` to `True` in the config."
211214
]
212215
},
213216
{
@@ -218,7 +221,7 @@
218221
},
219222
"outputs": [],
220223
"source": [
221-
"!pygmentize src/train-{job_name_prefix}.py"
224+
"!pygmentize src/train-rl-cartpole-ray.py"
222225
]
223226
},
224227
{
@@ -249,11 +252,12 @@
249252
"\n",
250253
"metric_definitions = RLEstimator.default_metric_definitions(RLToolkit.RAY)\n",
251254
" \n",
252-
"estimator = RLEstimator(entry_point=\"train-%s.py\" % job_name_prefix,\n",
255+
"estimator = RLEstimator(entry_point=\"train-rl-cartpole-ray.py\",\n",
253256
" source_dir='src',\n",
254257
" dependencies=[\"common/sagemaker_rl\"],\n",
255258
" image_name=custom_image_name,\n",
256259
" role=role,\n",
260+
" debugger_hook_config=False,\n",
257261
" train_instance_type=instance_type,\n",
258262
" train_instance_count=1,\n",
259263
" output_path=s3_output_path,\n",
@@ -456,22 +460,17 @@
456460
"print(\"Evaluation job: %s\" % job_name)"
457461
]
458462
},
459-
{
460-
"cell_type": "markdown",
461-
"metadata": {},
462-
"source": [
463-
"### Visualize the output \n",
464-
"\n",
465-
"Optionally, you can run the steps defined earlier to visualize the output."
466-
]
467-
},
468463
{
469464
"cell_type": "markdown",
470465
"metadata": {},
471466
"source": [
472467
"# Model deployment\n",
473468
"\n",
474-
"Now let us deploy the RL policy so that we can get the optimal action, given an environment observation."
469+
"Now let us deploy the RL policy so that we can get the optimal action, given an environment observation.\n",
470+
"\n",
471+
"**Note**: Model deployment is supported for TensorFLow only at current stage. \n",
472+
"\n",
473+
"STOP HERE IF PYTORCH IS USED."
475474
]
476475
},
477476
{
@@ -563,4 +562,4 @@
563562
},
564563
"nbformat": 4,
565564
"nbformat_minor": 4
566-
}
565+
}

reinforcement_learning/rl_cartpole_ray/src/train-rl-cartpole-ray.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def get_experiment_config(self):
2626
"training_iteration": 40
2727
},
2828
"config": {
29+
"use_pytorch": False,
2930
"gamma": 0.99,
3031
"kl_coeff": 1.0,
3132
"num_sgd_iter": 20,

0 commit comments

Comments
 (0)