|
44 | 44 | "metadata": {},
|
45 | 45 | "outputs": [],
|
46 | 46 | "source": [
|
47 |
| - "!{sys.executable} -m pip install sagemaker-experiments" |
| 47 | + "!{sys.executable} -m pip install sagemaker-experiments==0.1.24" |
48 | 48 | ]
|
49 | 49 | },
|
50 | 50 | {
|
|
60 | 60 | "metadata": {},
|
61 | 61 | "outputs": [],
|
62 | 62 | "source": [
|
63 |
| - "!{sys.executable} -m pip install torch\n", |
64 |
| - "!{sys.executable} -m pip install torchvision" |
| 63 | + "# pytorch version needs to be the same in both the notebook instance and the training job container \n", |
| 64 | + "# https://github.com/pytorch/pytorch/issues/25214\n", |
| 65 | + "!{sys.executable} -m pip install torch==1.1.0\n", |
| 66 | + "!{sys.executable} -m pip install torchvision==0.3.0\n", |
| 67 | + "!{sys.executable} -m pip install pillow==6.2.2 ", |
| 68 | + "!{sys.executable} -m pip install --upgrade sagemaker", |
65 | 69 | ]
|
66 | 70 | },
|
67 | 71 | {
|
|
73 | 77 | },
|
74 | 78 | {
|
75 | 79 | "cell_type": "code",
|
76 |
| - "execution_count": null, |
| 80 | + "execution_count": 1, |
77 | 81 | "metadata": {},
|
78 | 82 | "outputs": [],
|
79 | 83 | "source": [
|
|
82 | 86 | "import boto3\n",
|
83 | 87 | "import numpy as np\n",
|
84 | 88 | "import pandas as pd\n",
|
85 |
| - "%config InlineBackend.figure_format = 'retina'\n", |
| 89 | + "from IPython.display import set_matplotlib_formats\n", |
86 | 90 | "from matplotlib import pyplot as plt\n",
|
87 | 91 | "from torchvision import datasets, transforms\n",
|
88 | 92 | "\n",
|
|
94 | 98 | "from smexperiments.experiment import Experiment\n",
|
95 | 99 | "from smexperiments.trial import Trial\n",
|
96 | 100 | "from smexperiments.trial_component import TrialComponent\n",
|
97 |
| - "from smexperiments.tracker import Tracker" |
| 101 | + "from smexperiments.tracker import Tracker\n", |
| 102 | + "\n", |
| 103 | + "set_matplotlib_formats('retina')" |
98 | 104 | ]
|
99 | 105 | },
|
100 | 106 | {
|
|
307 | 313 | " # all input configurations, parameters, and metrics specified in estimator \n",
|
308 | 314 | " # definition are automatically tracked\n",
|
309 | 315 | " estimator = PyTorch(\n",
|
| 316 | + " py_version='py3',\n", |
310 | 317 | " entry_point='./mnist.py',\n",
|
311 | 318 | " role=role,\n",
|
312 | 319 | " sagemaker_session=sagemaker.Session(sagemaker_client=sm),\n",
|
313 | 320 | " framework_version='1.1.0',\n",
|
314 |
| - " train_instance_count=1,\n", |
315 |
| - " train_instance_type='ml.c4.xlarge',\n", |
| 321 | + " instance_count=1,\n", |
| 322 | + " instance_type='ml.c4.xlarge',\n", |
316 | 323 | " hyperparameters={\n",
|
317 | 324 | " 'epochs': 2,\n",
|
318 | 325 | " 'backend': 'gloo',\n",
|
|
470 | 477 | " model_data, \n",
|
471 | 478 | " role, \n",
|
472 | 479 | " './mnist.py', \n",
|
| 480 | + " py_version='py3',\n", |
473 | 481 | " env=env, \n",
|
474 | 482 | " sagemaker_session=sagemaker.Session(sagemaker_client=sm),\n",
|
475 | 483 | " framework_version='1.1.0',\n",
|
|
497 | 505 | "metadata": {},
|
498 | 506 | "outputs": [],
|
499 | 507 | "source": [
|
500 |
| - "predictor.delete_endpoint()\n", |
501 |
| - "\n", |
502 |
| - "def cleanup(experiment):\n", |
503 |
| - " for trial_summary in experiment.list_trials():\n", |
504 |
| - " trial = Trial.load(sagemaker_boto_client=sm, trial_name=trial_summary.trial_name)\n", |
505 |
| - " for trial_component_summary in trial.list_trial_components():\n", |
506 |
| - " tc = TrialComponent.load(\n", |
507 |
| - " sagemaker_boto_client=sm,\n", |
508 |
| - " trial_component_name=trial_component_summary.trial_component_name)\n", |
509 |
| - " trial.remove_trial_component(tc)\n", |
510 |
| - " try:\n", |
511 |
| - " # comment out to keep trial components\n", |
512 |
| - " tc.delete()\n", |
513 |
| - " except:\n", |
514 |
| - " # tc is associated with another trial\n", |
515 |
| - " continue\n", |
516 |
| - " # to prevent throttling\n", |
517 |
| - " time.sleep(.5)\n", |
518 |
| - " trial.delete()\n", |
519 |
| - " experiment.delete()\n", |
520 |
| - "\n", |
521 |
| - "cleanup(mnist_experiment)" |
| 508 | + "predictor.delete_endpoint()" |
522 | 509 | ]
|
523 | 510 | },
|
524 | 511 | {
|
525 | 512 | "cell_type": "code",
|
526 | 513 | "execution_count": null,
|
527 | 514 | "metadata": {},
|
528 | 515 | "outputs": [],
|
529 |
| - "source": [] |
| 516 | + "source": [ |
| 517 | + "mnist_experiment.delete_all(action='--force')" |
| 518 | + ] |
| 519 | + }, |
| 520 | + { |
| 521 | + "cell_type": "markdown", |
| 522 | + "metadata": {}, |
| 523 | + "source": [ |
| 524 | + "## Contact\n", |
| 525 | + "Submit any questions or issues to https://github.com/aws/sagemaker-experiments/issues or mention @aws/sagemakerexperimentsadmin " |
| 526 | + ] |
530 | 527 | }
|
531 | 528 | ],
|
532 | 529 | "metadata": {
|
| 530 | + "instance_type": "ml.t3.medium", |
533 | 531 | "kernelspec": {
|
534 | 532 | "display_name": "Python 3 (Data Science)",
|
535 | 533 | "language": "python",
|
|
0 commit comments