|
49 | 49 | "cell_type": "code",
|
50 | 50 | "execution_count": null,
|
51 | 51 | "metadata": {
|
52 |
| - "collapsed": true, |
53 | 52 | "isConfigCell": true,
|
54 | 53 | "tags": [
|
55 | 54 | "parameters"
|
|
80 | 79 | {
|
81 | 80 | "cell_type": "code",
|
82 | 81 | "execution_count": null,
|
83 |
| - "metadata": { |
84 |
| - "collapsed": true |
85 |
| - }, |
| 82 | + "metadata": {}, |
86 | 83 | "outputs": [],
|
87 | 84 | "source": [
|
88 | 85 | "import pandas as pd\n",
|
|
114 | 111 | {
|
115 | 112 | "cell_type": "code",
|
116 | 113 | "execution_count": null,
|
117 |
| - "metadata": { |
118 |
| - "collapsed": true |
119 |
| - }, |
| 114 | + "metadata": {}, |
120 | 115 | "outputs": [],
|
121 | 116 | "source": [
|
122 | 117 | "!wget http://dataminingconsultant.com/DKD2e_data_sets.zip\n",
|
|
126 | 121 | {
|
127 | 122 | "cell_type": "code",
|
128 | 123 | "execution_count": null,
|
129 |
| - "metadata": { |
130 |
| - "collapsed": true |
131 |
| - }, |
| 124 | + "metadata": {}, |
132 | 125 | "outputs": [],
|
133 | 126 | "source": [
|
134 | 127 | "churn = pd.read_csv('./Data sets/churn.txt')\n",
|
|
166 | 159 | {
|
167 | 160 | "cell_type": "code",
|
168 | 161 | "execution_count": null,
|
169 |
| - "metadata": { |
170 |
| - "collapsed": true |
171 |
| - }, |
| 162 | + "metadata": {}, |
172 | 163 | "outputs": [],
|
173 | 164 | "source": [
|
174 | 165 | "# Frequency tables for each categorical feature\n",
|
|
195 | 186 | {
|
196 | 187 | "cell_type": "code",
|
197 | 188 | "execution_count": null,
|
198 |
| - "metadata": { |
199 |
| - "collapsed": true |
200 |
| - }, |
| 189 | + "metadata": {}, |
201 | 190 | "outputs": [],
|
202 | 191 | "source": [
|
203 | 192 | "churn = churn.drop('Phone', axis=1)\n",
|
|
214 | 203 | {
|
215 | 204 | "cell_type": "code",
|
216 | 205 | "execution_count": null,
|
217 |
| - "metadata": { |
218 |
| - "collapsed": true |
219 |
| - }, |
| 206 | + "metadata": {}, |
220 | 207 | "outputs": [],
|
221 | 208 | "source": [
|
222 | 209 | "for column in churn.select_dtypes(include=['object']).columns:\n",
|
|
246 | 233 | {
|
247 | 234 | "cell_type": "code",
|
248 | 235 | "execution_count": null,
|
249 |
| - "metadata": { |
250 |
| - "collapsed": true |
251 |
| - }, |
| 236 | + "metadata": {}, |
252 | 237 | "outputs": [],
|
253 | 238 | "source": [
|
254 | 239 | "display(churn.corr())\n",
|
|
266 | 251 | {
|
267 | 252 | "cell_type": "code",
|
268 | 253 | "execution_count": null,
|
269 |
| - "metadata": { |
270 |
| - "collapsed": true |
271 |
| - }, |
| 254 | + "metadata": {}, |
272 | 255 | "outputs": [],
|
273 | 256 | "source": [
|
274 | 257 | "churn = churn.drop(['Day Charge', 'Eve Charge', 'Night Charge', 'Intl Charge'], axis=1)"
|
|
290 | 273 | {
|
291 | 274 | "cell_type": "code",
|
292 | 275 | "execution_count": null,
|
293 |
| - "metadata": { |
294 |
| - "collapsed": true |
295 |
| - }, |
| 276 | + "metadata": {}, |
296 | 277 | "outputs": [],
|
297 | 278 | "source": [
|
298 | 279 | "model_data = pd.get_dummies(churn)\n",
|
|
309 | 290 | {
|
310 | 291 | "cell_type": "code",
|
311 | 292 | "execution_count": null,
|
312 |
| - "metadata": { |
313 |
| - "collapsed": true |
314 |
| - }, |
| 293 | + "metadata": {}, |
315 | 294 | "outputs": [],
|
316 | 295 | "source": [
|
317 | 296 | "train_data, validation_data, test_data = np.split(model_data.sample(frac=1, random_state=1729), [int(0.7 * len(model_data)), int(0.9 * len(model_data))])\n",
|
|
329 | 308 | {
|
330 | 309 | "cell_type": "code",
|
331 | 310 | "execution_count": null,
|
332 |
| - "metadata": { |
333 |
| - "collapsed": true |
334 |
| - }, |
| 311 | + "metadata": {}, |
335 | 312 | "outputs": [],
|
336 | 313 | "source": [
|
337 | 314 | "boto3.Session().resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train/train.csv')).upload_file('train.csv')\n",
|
|
351 | 328 | {
|
352 | 329 | "cell_type": "code",
|
353 | 330 | "execution_count": null,
|
354 |
| - "metadata": { |
355 |
| - "collapsed": true |
356 |
| - }, |
| 331 | + "metadata": {}, |
357 | 332 | "outputs": [],
|
358 | 333 | "source": [
|
359 | 334 | "from sagemaker.amazon.amazon_estimator import get_image_uri\n",
|
360 |
| - "container = get_image_uri(boto3.Session().region_name, 'xgboost')" |
| 335 | + "container = sagemaker.image_uris.retrieve('xgboost', boto3.Session().region_name, '1')\n", |
| 336 | + "display(container)" |
361 | 337 | ]
|
362 | 338 | },
|
363 | 339 | {
|
|
370 | 346 | {
|
371 | 347 | "cell_type": "code",
|
372 | 348 | "execution_count": null,
|
373 |
| - "metadata": { |
374 |
| - "collapsed": true |
375 |
| - }, |
| 349 | + "metadata": {}, |
376 | 350 | "outputs": [],
|
377 | 351 | "source": [
|
378 |
| - "s3_input_train = sagemaker.s3_input(s3_data='s3://{}/{}/train'.format(bucket, prefix), content_type='csv')\n", |
379 |
| - "s3_input_validation = sagemaker.s3_input(s3_data='s3://{}/{}/validation/'.format(bucket, prefix), content_type='csv')" |
| 352 | + "s3_input_train = sagemaker.inputs.TrainingInput(s3_data='s3://{}/{}/train'.format(bucket, prefix), content_type='csv')\n", |
| 353 | + "s3_input_validation = sagemaker.inputs.TrainingInput(s3_data='s3://{}/{}/validation/'.format(bucket, prefix), content_type='csv')" |
380 | 354 | ]
|
381 | 355 | },
|
382 | 356 | {
|
|
396 | 370 | {
|
397 | 371 | "cell_type": "code",
|
398 | 372 | "execution_count": null,
|
399 |
| - "metadata": { |
400 |
| - "collapsed": true |
401 |
| - }, |
| 373 | + "metadata": {}, |
402 | 374 | "outputs": [],
|
403 | 375 | "source": [
|
404 | 376 | "sess = sagemaker.Session()\n",
|
405 | 377 | "\n",
|
406 | 378 | "xgb = sagemaker.estimator.Estimator(container,\n",
|
407 | 379 | " role, \n",
|
408 |
| - " train_instance_count=1, \n", |
409 |
| - " train_instance_type='ml.m4.xlarge',\n", |
| 380 | + " instance_count=1, \n", |
| 381 | + " instance_type='ml.m4.xlarge',\n", |
410 | 382 | " output_path='s3://{}/{}/output'.format(bucket, prefix),\n",
|
411 | 383 | " sagemaker_session=sess)\n",
|
412 | 384 | "xgb.set_hyperparameters(max_depth=5,\n",
|
|
437 | 409 | "outputs": [],
|
438 | 410 | "source": [
|
439 | 411 | "compiled_model = xgb\n",
|
440 |
| - "if xgb.create_model().check_neo_region(boto3.Session().region_name) is False:\n", |
441 |
| - " print('Neo is not currently supported in', boto3.Session().region_name)\n", |
442 |
| - "else:\n", |
443 |
| - " output_path = '/'.join(xgb.output_path.split('/')[:-1])\n", |
444 |
| - " compiled_model = xgb.compile_model(target_instance_family='ml_m4', \n", |
445 |
| - " input_shape={'data': [1, 69]},\n", |
446 |
| - " role=role,\n", |
447 |
| - " framework='xgboost',\n", |
448 |
| - " framework_version='0.7',\n", |
449 |
| - " output_path=output_path)\n", |
450 |
| - " compiled_model.name = 'deployed-xgboost-customer-churn'\n", |
451 |
| - " compiled_model.image = get_image_uri(sess.boto_region_name, 'xgboost-neo', repo_version='latest')" |
| 412 | + "output_path = '/'.join(xgb.output_path.split('/')[:-1])\n", |
| 413 | + "compiled_model = xgb.compile_model(target_instance_family='ml_m4', \n", |
| 414 | + " input_shape={'data': [1, 69]},\n", |
| 415 | + " role=role,\n", |
| 416 | + " framework='xgboost',\n", |
| 417 | + " framework_version='latest',\n", |
| 418 | + " output_path=output_path)\n", |
| 419 | + "compiled_model.name = 'deployed-xgboost-customer-churn'\n", |
| 420 | + "compiled_model.image = get_image_uri(sess.boto_region_name, 'xgboost-neo', repo_version='latest')" |
452 | 421 | ]
|
453 | 422 | },
|
454 | 423 | {
|
|
464 | 433 | {
|
465 | 434 | "cell_type": "code",
|
466 | 435 | "execution_count": null,
|
467 |
| - "metadata": { |
468 |
| - "collapsed": true |
469 |
| - }, |
| 436 | + "metadata": {}, |
470 | 437 | "outputs": [],
|
471 | 438 | "source": [
|
472 |
| - "xgb_predictor = compiled_model.deploy(initial_instance_count = 1, instance_type = 'ml.m4.xlarge')" |
| 439 | + "xgb_predictor = compiled_model.deploy(\n", |
| 440 | + " initial_instance_count = 1, \n", |
| 441 | + " instance_type = 'ml.m4.xlarge',\n", |
| 442 | + " serializer=sagemaker.serializers.CSVSerializer())" |
473 | 443 | ]
|
474 | 444 | },
|
475 | 445 | {
|
|
481 | 451 | "Now that we have a hosted endpoint running, we can make real-time predictions from our model very easily, simply by making an http POST request. But first, we'll need to setup serializers and deserializers for passing our `test_data` NumPy arrays to the model behind the endpoint."
|
482 | 452 | ]
|
483 | 453 | },
|
484 |
| - { |
485 |
| - "cell_type": "code", |
486 |
| - "execution_count": null, |
487 |
| - "metadata": { |
488 |
| - "collapsed": true |
489 |
| - }, |
490 |
| - "outputs": [], |
491 |
| - "source": [ |
492 |
| - "xgb_predictor.content_type = 'text/csv'\n", |
493 |
| - "xgb_predictor.serializer = csv_serializer\n", |
494 |
| - "xgb_predictor.deserializer = None" |
495 |
| - ] |
496 |
| - }, |
497 | 454 | {
|
498 | 455 | "cell_type": "markdown",
|
499 | 456 | "metadata": {},
|
|
509 | 466 | {
|
510 | 467 | "cell_type": "code",
|
511 | 468 | "execution_count": null,
|
512 |
| - "metadata": { |
513 |
| - "collapsed": true |
514 |
| - }, |
| 469 | + "metadata": {}, |
515 | 470 | "outputs": [],
|
516 | 471 | "source": [
|
517 | 472 | "def predict(data, rows=500):\n",
|
|
535 | 490 | {
|
536 | 491 | "cell_type": "code",
|
537 | 492 | "execution_count": null,
|
538 |
| - "metadata": { |
539 |
| - "collapsed": true |
540 |
| - }, |
| 493 | + "metadata": {}, |
541 | 494 | "outputs": [],
|
542 | 495 | "source": [
|
543 | 496 | "pd.crosstab(index=test_data.iloc[:, 0], columns=np.round(predictions), rownames=['actual'], colnames=['predictions'])"
|
|
559 | 512 | {
|
560 | 513 | "cell_type": "code",
|
561 | 514 | "execution_count": null,
|
562 |
| - "metadata": { |
563 |
| - "collapsed": true |
564 |
| - }, |
| 515 | + "metadata": {}, |
565 | 516 | "outputs": [],
|
566 | 517 | "source": [
|
567 | 518 | "plt.hist(predictions)\n",
|
|
578 | 529 | {
|
579 | 530 | "cell_type": "code",
|
580 | 531 | "execution_count": null,
|
581 |
| - "metadata": { |
582 |
| - "collapsed": true |
583 |
| - }, |
| 532 | + "metadata": {}, |
584 | 533 | "outputs": [],
|
585 | 534 | "source": [
|
586 | 535 | "pd.crosstab(index=test_data.iloc[:, 0], columns=np.where(predictions > 0.3, 1, 0))"
|
|
629 | 578 | {
|
630 | 579 | "cell_type": "code",
|
631 | 580 | "execution_count": null,
|
632 |
| - "metadata": { |
633 |
| - "collapsed": true |
634 |
| - }, |
| 581 | + "metadata": {}, |
635 | 582 | "outputs": [],
|
636 | 583 | "source": [
|
637 | 584 | "cutoffs = np.arange(0.01, 1, 0.01)\n",
|
|
683 | 630 | {
|
684 | 631 | "cell_type": "code",
|
685 | 632 | "execution_count": null,
|
686 |
| - "metadata": { |
687 |
| - "collapsed": true |
688 |
| - }, |
| 633 | + "metadata": {}, |
689 | 634 | "outputs": [],
|
690 | 635 | "source": [
|
691 |
| - "sagemaker.Session().delete_endpoint(xgb_predictor.endpoint)" |
| 636 | + "xgb_predictor.delete_endpoint()" |
692 | 637 | ]
|
| 638 | + }, |
| 639 | + { |
| 640 | + "cell_type": "code", |
| 641 | + "execution_count": null, |
| 642 | + "metadata": {}, |
| 643 | + "outputs": [], |
| 644 | + "source": [] |
693 | 645 | }
|
694 | 646 | ],
|
695 | 647 | "metadata": {
|
696 | 648 | "celltoolbar": "Tags",
|
697 | 649 | "kernelspec": {
|
698 |
| - "display_name": "Python 3", |
| 650 | + "display_name": "conda_python3", |
699 | 651 | "language": "python",
|
700 |
| - "name": "python3" |
701 |
| - }, |
702 |
| - "language_info": { |
703 |
| - "codemirror_mode": { |
704 |
| - "name": "ipython", |
705 |
| - "version": 3 |
706 |
| - }, |
707 |
| - "file_extension": ".py", |
708 |
| - "mimetype": "text/x-python", |
709 |
| - "name": "python", |
710 |
| - "nbconvert_exporter": "python", |
711 |
| - "pygments_lexer": "ipython3", |
712 |
| - "version": "3.7.3" |
| 652 | + "name": "conda_python3" |
713 | 653 | },
|
714 | 654 | "notice": "Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the \"License\"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the \"license\" file accompanying this file. This file is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License."
|
715 | 655 | },
|
|
0 commit comments