|
44 | 44 | "metadata": {},
|
45 | 45 | "outputs": [],
|
46 | 46 | "source": [
|
47 |
| - "!{sys.executable} -m pip install sagemaker-experiments" |
| 47 | + "!{sys.executable} -m pip install sagemaker-experiments==0.1.24" |
48 | 48 | ]
|
49 | 49 | },
|
50 | 50 | {
|
|
60 | 60 | "metadata": {},
|
61 | 61 | "outputs": [],
|
62 | 62 | "source": [
|
63 |
| - "!{sys.executable} -m pip install torch\n", |
64 |
| - "!{sys.executable} -m pip install torchvision" |
| 63 | + "# pytorch version needs to be the same in both the notebook instance and the training job container \n", |
| 64 | + "# https://github.com/pytorch/pytorch/issues/25214\n", |
| 65 | + "!{sys.executable} -m pip install torch==1.1.0\n", |
| 66 | + "!{sys.executable} -m pip install torchvision==0.3.0\n", |
| 67 | + "!{sys.executable} -m pip install pillow==6.2.2" |
65 | 68 | ]
|
66 | 69 | },
|
67 | 70 | {
|
|
73 | 76 | },
|
74 | 77 | {
|
75 | 78 | "cell_type": "code",
|
76 |
| - "execution_count": null, |
| 79 | + "execution_count": 1, |
77 | 80 | "metadata": {},
|
78 | 81 | "outputs": [],
|
79 | 82 | "source": [
|
|
82 | 85 | "import boto3\n",
|
83 | 86 | "import numpy as np\n",
|
84 | 87 | "import pandas as pd\n",
|
85 |
| - "%config InlineBackend.figure_format = 'retina'\n", |
| 88 | + "from IPython.display import set_matplotlib_formats\n", |
86 | 89 | "from matplotlib import pyplot as plt\n",
|
87 | 90 | "from torchvision import datasets, transforms\n",
|
88 | 91 | "\n",
|
|
94 | 97 | "from smexperiments.experiment import Experiment\n",
|
95 | 98 | "from smexperiments.trial import Trial\n",
|
96 | 99 | "from smexperiments.trial_component import TrialComponent\n",
|
97 |
| - "from smexperiments.tracker import Tracker" |
| 100 | + "from smexperiments.tracker import Tracker\n", |
| 101 | + "\n", |
| 102 | + "set_matplotlib_formats('retina')" |
98 | 103 | ]
|
99 | 104 | },
|
100 | 105 | {
|
|
307 | 312 | " # all input configurations, parameters, and metrics specified in estimator \n",
|
308 | 313 | " # definition are automatically tracked\n",
|
309 | 314 | " estimator = PyTorch(\n",
|
| 315 | + " py_version='py3',\n", |
310 | 316 | " entry_point='./mnist.py',\n",
|
311 | 317 | " role=role,\n",
|
312 | 318 | " sagemaker_session=sagemaker.Session(sagemaker_client=sm),\n",
|
313 | 319 | " framework_version='1.1.0',\n",
|
314 |
| - " train_instance_count=1,\n", |
315 |
| - " train_instance_type='ml.c4.xlarge',\n", |
| 320 | + " instance_count=1,\n", |
| 321 | + " instance_type='ml.c4.xlarge',\n", |
316 | 322 | " hyperparameters={\n",
|
317 | 323 | " 'epochs': 2,\n",
|
318 | 324 | " 'backend': 'gloo',\n",
|
|
470 | 476 | " model_data, \n",
|
471 | 477 | " role, \n",
|
472 | 478 | " './mnist.py', \n",
|
| 479 | + " py_version='py3',\n", |
473 | 480 | " env=env, \n",
|
474 | 481 | " sagemaker_session=sagemaker.Session(sagemaker_client=sm),\n",
|
475 | 482 | " framework_version='1.1.0',\n",
|
|
497 | 504 | "metadata": {},
|
498 | 505 | "outputs": [],
|
499 | 506 | "source": [
|
500 |
| - "predictor.delete_endpoint()\n", |
501 |
| - "\n", |
502 |
| - "def cleanup(experiment):\n", |
503 |
| - " for trial_summary in experiment.list_trials():\n", |
504 |
| - " trial = Trial.load(sagemaker_boto_client=sm, trial_name=trial_summary.trial_name)\n", |
505 |
| - " for trial_component_summary in trial.list_trial_components():\n", |
506 |
| - " tc = TrialComponent.load(\n", |
507 |
| - " sagemaker_boto_client=sm,\n", |
508 |
| - " trial_component_name=trial_component_summary.trial_component_name)\n", |
509 |
| - " trial.remove_trial_component(tc)\n", |
510 |
| - " try:\n", |
511 |
| - " # comment out to keep trial components\n", |
512 |
| - " tc.delete()\n", |
513 |
| - " except:\n", |
514 |
| - " # tc is associated with another trial\n", |
515 |
| - " continue\n", |
516 |
| - " # to prevent throttling\n", |
517 |
| - " time.sleep(.5)\n", |
518 |
| - " trial.delete()\n", |
519 |
| - " experiment.delete()\n", |
520 |
| - "\n", |
521 |
| - "cleanup(mnist_experiment)" |
| 507 | + "predictor.delete_endpoint()" |
522 | 508 | ]
|
523 | 509 | },
|
524 | 510 | {
|
525 | 511 | "cell_type": "code",
|
526 | 512 | "execution_count": null,
|
527 | 513 | "metadata": {},
|
528 | 514 | "outputs": [],
|
529 |
| - "source": [] |
| 515 | + "source": [ |
| 516 | + "mnist_experiment.delete_all(action='--force')" |
| 517 | + ] |
| 518 | + }, |
| 519 | + { |
| 520 | + "cell_type": "markdown", |
| 521 | + "metadata": {}, |
| 522 | + "source": [ |
| 523 | + "## Contact\n", |
| 524 | + "Submit any questions or issues to https://github.com/aws/sagemaker-experiments/issues or mention @aws/sagemakerexperimentsadmin " |
| 525 | + ] |
530 | 526 | }
|
531 | 527 | ],
|
532 | 528 | "metadata": {
|
| 529 | + "instance_type": "ml.t3.medium", |
533 | 530 | "kernelspec": {
|
534 | 531 | "display_name": "Python 3 (Data Science)",
|
535 | 532 | "language": "python",
|
|
0 commit comments