Skip to content

change output data dir for chainer #282

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 12, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@
"* `SM_MODEL_DIR`: A string representing the path to the directory to write model artifacts to.\n",
" These artifacts are uploaded to S3 for model hosting.\n",
"* `SM_NUM_GPUS`: An integer representing the number of GPUs available to the host.\n",
"* `SM_OUTPUT_DATA_DIR`: A string representing the filesystem path to write output artifacts to. Output artifacts may\n",
"* `SM_OUTPUT_DIR`: A string representing the filesystem path to write output artifacts to. Output artifacts may\n",
" include checkpoints, graphs, and other files to save, not including model artifacts. These artifacts are compressed\n",
" and uploaded to S3 to the same S3 prefix as the model artifacts.\n",
"\n",
Expand Down Expand Up @@ -291,10 +291,10 @@
"from IPython.display import Image\n",
"from IPython.display import display\n",
"\n",
"accuracy_graph = Image(filename=\"output/single_machine_cifar/algo-1/accuracy.png\",\n",
"accuracy_graph = Image(filename=\"output/single_machine_cifar/accuracy.png\",\n",
" width=800,\n",
" height=800)\n",
"loss_graph = Image(filename=\"output/single_machine_cifar/algo-1/loss.png\",\n",
"loss_graph = Image(filename=\"output/single_machine_cifar/loss.png\",\n",
" width=800,\n",
" height=800)\n",
"\n",
Expand Down Expand Up @@ -449,7 +449,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
"version": "3.6.5"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@
"* `SM_MODEL_DIR`: A string representing the path to the directory to write model artifacts to.\n",
" These artifacts are uploaded to S3 for model hosting.\n",
"* `SM_NUM_GPUS`: An integer representing the number of GPUs available to the host.\n",
"* `SM_OUTPUT_DATA_DIR`: A string representing the filesystem path to write output artifacts to. Output artifacts may\n",
"* `SM_OUTPUT_DIR`: A string representing the filesystem path to write output artifacts to. Output artifacts may\n",
" include checkpoints, graphs, and other files to save, not including model artifacts. These artifacts are compressed\n",
" and uploaded to S3 to the same S3 prefix as the model artifacts.\n",
"\n",
Expand Down Expand Up @@ -298,10 +298,10 @@
"from IPython.display import Image\n",
"from IPython.display import display\n",
"\n",
"accuracy_graph = Image(filename=\"output/distributed_cifar/algo-1/accuracy.png\",\n",
"accuracy_graph = Image(filename=\"output/distributed_cifar/accuracy.png\",\n",
" width=800,\n",
" height=800)\n",
"loss_graph = Image(filename=\"output/distributed_cifar/algo-1/loss.png\",\n",
"loss_graph = Image(filename=\"output/distributed_cifar/loss.png\",\n",
" width=800,\n",
" height=800)\n",
"\n",
Expand Down Expand Up @@ -456,7 +456,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
"version": "3.6.5"
},
"notice": "Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws. amazon.com/apache2.0/ or in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License."
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
parser.add_argument('--communicator', type=str, default='pure_nccl' if num_gpus > 0 else 'naive')

# Data, model, and output directories. These are required.
parser.add_argument('--output-data-dir', type=str, default=os.environ['SM_OUTPUT_DATA_DIR'])
parser.add_argument('--output-dir', type=str, default=os.environ['SM_OUTPUT_DIR'])
parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])
parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAIN'])
parser.add_argument('--test', type=str, default=os.environ['SM_CHANNEL_TEST'])
Expand Down Expand Up @@ -91,7 +91,8 @@

# Set up a trainer
updater = training.StandardUpdater(train_iter, optimizer, device=device)
trainer = training.Trainer(updater, (args.epochs, 'epoch'), out=args.output_data_dir)
output_data_dir = os.path.join(args.output_dir, 'data')
trainer = training.Trainer(updater, (args.epochs, 'epoch'), out=output_data_dir)

# Evaluate the model with the test dataset for each epoch

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
parser.add_argument('--learning-rate', type=float, default=0.05)

# Data, model, and output directories These are required.
parser.add_argument('--output-data-dir', type=str, default=os.environ['SM_OUTPUT_DATA_DIR'])
parser.add_argument('--output-dir', type=str, default=os.environ['SM_OUTPUT_DIR'])
parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])
parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAIN'])
parser.add_argument('--test', type=str, default=os.environ['SM_CHANNEL_TEST'])
Expand Down Expand Up @@ -82,7 +82,9 @@
updater = training.updater.StandardUpdater(train_iter, optimizer, device=device)

stop_trigger = (args.epochs, 'epoch')
trainer = training.Trainer(updater, stop_trigger, out=args.output_data_dir)

output_data_dir = os.path.join(args.output_dir, 'data')
trainer = training.Trainer(updater, stop_trigger, out=output_data_dir)
# Evaluate the model with the test dataset for each epoch
trainer.extend(extensions.Evaluator(test_iter, model, device=device))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@
"* `SM_MODEL_DIR`: A string representing the path to the directory to write model artifacts to.\n",
" These artifacts are uploaded to S3 for model hosting.\n",
"* `SM_NUM_GPUS`: An integer representing the number of GPUs available to the host.\n",
"* `SM_OUTPUT_DATA_DIR`: A string representing the filesystem path to write output artifacts to. Output artifacts may\n",
"* `SM_OUTPUT_DIR`: A string representing the filesystem path to write output artifacts to. Output artifacts may\n",
" include checkpoints, graphs, and other files to save, not including model artifacts. These artifacts are compressed\n",
" and uploaded to S3 to the same S3 prefix as the model artifacts.\n",
"\n",
Expand Down Expand Up @@ -501,7 +501,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
"version": "3.6.5"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __call__(self, x):
parser.add_argument('--batch-size', type=int, default=64)

# Data, model, and output directories. These are required.
parser.add_argument('--output-data-dir', type=str, default=os.environ['SM_OUTPUT_DATA_DIR'])
parser.add_argument('--output-dir', type=str, default=os.environ['SM_OUTPUT_DIR'])
parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])
parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAIN'])
parser.add_argument('--test', type=str, default=os.environ['SM_CHANNEL_TEST'])
Expand Down Expand Up @@ -96,7 +96,7 @@ def __call__(self, x):
updater = training.StandardUpdater(train_iter, optimizer, device=device)

# Write output files to output_data_dir. These are zipped and uploaded to S3 output path as output.tar.gz.
trainer = training.Trainer(updater, (args.epochs, 'epoch'), out=args.output_data_dir)
trainer = training.Trainer(updater, (args.epochs, 'epoch'), out=args.output_dir)

# Evaluate the model with the test dataset for each epoch
trainer.extend(extensions.Evaluator(test_iter, model, device=device))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@
"* `SM_MODEL_DIR`: A string representing the path to the directory to write model artifacts to.\n",
" These artifacts are uploaded to S3 for model hosting.\n",
"* `SM_NUM_GPUS`: An integer representing the number of GPUs available to the host.\n",
"* `SM_OUTPUT_DATA_DIR`: A string representing the filesystem path to write output artifacts to. Output artifacts may\n",
"* `SM_OUTPUT_DIR`: A string representing the filesystem path to write output artifacts to. Output artifacts may\n",
" include checkpoints, graphs, and other files to save, not including model artifacts. These artifacts are compressed\n",
" and uploaded to S3 to the same S3 prefix as the model artifacts.\n",
"\n",
Expand Down Expand Up @@ -289,10 +289,10 @@
"from IPython.display import Image\n",
"from IPython.display import display\n",
"\n",
"accuracy_graph = Image(filename=\"output/sentiment/algo-1/accuracy.png\",\n",
"accuracy_graph = Image(filename=\"output/sentiment/accuracy.png\",\n",
" width=800,\n",
" height=800)\n",
"loss_graph = Image(filename=\"output/sentiment/algo-1/loss.png\",\n",
"loss_graph = Image(filename=\"output/sentiment/loss.png\",\n",
" width=800,\n",
" height=800)\n",
"\n",
Expand Down Expand Up @@ -409,7 +409,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
"version": "3.6.5"
},
"notice": "Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws. amazon.com/apache2.0/ or in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License."
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
parser.add_argument('--model-type', type=str, default='rnn')

# Data, model, and output directories. These are required.
parser.add_argument('--output-data-dir', type=str, default=os.environ['SM_OUTPUT_DATA_DIR'])
parser.add_argument('--output-dir', type=str, default=os.environ['SM_OUTPUT_DIR'])
parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])
parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAIN'])
parser.add_argument('--test', type=str, default=os.environ['SM_CHANNEL_TEST'])
Expand Down Expand Up @@ -103,7 +103,11 @@
test_iter = chainer.iterators.SerialIterator(test, args.batch_size, repeat=False, shuffle=False)
updater = training.updater.StandardUpdater(train_iter, optimizer, converter=convert_seq, device=device)

trainer = training.Trainer(updater, (args.epochs, 'epoch'), out=args.output_data_dir)
# SageMaker saves the return value of train() in the `save` function in the resulting
# model artifact model.tar.gz, and the contents of `output_data_dir` in the output
# artifact output.tar.gz.
output_data_dir = os.path.join(args.output_dir, 'data')
trainer = training.Trainer(updater, (args.epochs, 'epoch'), out=output_data_dir)

# Evaluate the model with the test dataset for each epoch
trainer.extend(extensions.Evaluator(test_iter, model, converter=convert_seq, device=device))
Expand Down Expand Up @@ -138,15 +142,11 @@
# Run the training
trainer.run()

# SageMaker saves the return value of train() in the `save` function in the resulting
# model artifact model.tar.gz, and the contents of `output_data_dir` in the output
# artifact output.tar.gz.

# load the best model
serializers.load_npz(os.path.join(args.output_data_dir, 'best_model.npz'), model)
serializers.load_npz(os.path.join(output_data_dir, 'best_model.npz'), model)

# remove the best model from output artifacts (since it will be saved as a model artifact)
os.remove(os.path.join(args.output_data_dir, 'best_model.npz'))
os.remove(os.path.join(output_data_dir, 'best_model.npz'))

serializers.save_npz(os.path.join(args.model_dir, 'my_model.npz'), model)
with open(os.path.join(args.model_dir, 'vocab.json'), 'w') as f:
Expand Down