aws
diff --git a/‎sagemaker-pipelines/index.rst
Lines changed: 2 additions & 1 deletion b/‎sagemaker-pipelines/index.rst
Lines changed: 2 additions & 1 deletion
diff --git a/‎sagemaker-pipelines/img/pipeline-1.png renamed to ‎sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-1.png b/‎sagemaker-pipelines/img/pipeline-1.png renamed to ‎sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-1.png
diff --git a/‎sagemaker-pipelines/img/pipeline-2.png renamed to ‎sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-2.png b/‎sagemaker-pipelines/img/pipeline-2.png renamed to ‎sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-2.png
diff --git a/‎sagemaker-pipelines/img/pipeline-3.png renamed to ‎sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-3.png b/‎sagemaker-pipelines/img/pipeline-3.png renamed to ‎sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-3.png
diff --git a/‎sagemaker-pipelines/img/pipeline-4.png renamed to ‎sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-4.png b/‎sagemaker-pipelines/img/pipeline-4.png renamed to ‎sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-4.png
diff --git a/‎sagemaker-pipelines/img/pipeline-5.png renamed to ‎sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-5.png b/‎sagemaker-pipelines/img/pipeline-5.png renamed to ‎sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-5.png
diff --git a/‎sagemaker-pipelines/img/pipeline-6.png renamed to ‎sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-6.png b/‎sagemaker-pipelines/img/pipeline-6.png renamed to ‎sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-6.png
diff --git a/‎sagemaker-pipelines/img/pipeline-7.png renamed to ‎sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-7.png b/‎sagemaker-pipelines/img/pipeline-7.png renamed to ‎sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-7.png
diff --git a/‎sagemaker-pipelines/img/pipeline-full.png renamed to ‎sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-full.png b/‎sagemaker-pipelines/img/pipeline-full.png renamed to ‎sagemaker-pipelines/tabular/abalone_build_train_deploy/img/pipeline-full.png
diff --git a/‎sagemaker-pipelines/sagemaker-pipelines-preprocess-train-evaluate-batch-transform.ipynb renamed to ‎sagemaker-pipelines/tabular/abalone_build_train_deploy/sagemaker-pipelines-preprocess-train-evaluate-batch-transform.ipynb b/‎sagemaker-pipelines/sagemaker-pipelines-preprocess-train-evaluate-batch-transform.ipynb renamed to ‎sagemaker-pipelines/tabular/abalone_build_train_deploy/sagemaker-pipelines-preprocess-train-evaluate-batch-transform.ipynb
diff --git a/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/approve_model.png
50.1 KB b/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/approve_model.png
50.1 KB
diff --git a/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/clone_repos.png
50.1 KB b/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/clone_repos.png
50.1 KB
diff --git a/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/create_project.png
71.1 KB b/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/create_project.png
71.1 KB
diff --git a/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/dag.png
113 KB b/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/dag.png
113 KB
diff --git a/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/deep_dive.png
327 KB b/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/deep_dive.png
327 KB
diff --git a/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/enable_projects.png
173 KB b/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/enable_projects.png
173 KB
diff --git a/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/endpoints.png
44.3 KB b/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/endpoints.png
44.3 KB
diff --git a/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/execute_pipeline.png
27.8 KB b/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/execute_pipeline.png
27.8 KB
diff --git a/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/git_push.png
145 KB b/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/git_push.png
145 KB
diff --git a/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/model_metrics.png
88.4 KB b/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/model_metrics.png
88.4 KB
diff --git a/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/repo_directory.png
35.7 KB b/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/repo_directory.png
35.7 KB
diff --git a/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/select_projects.png
32.1 KB b/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/img/select_projects.png
32.1 KB
diff --git a/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/modelbuild/codebuild-buildspec.yml
Lines changed: 20 additions & 0 deletions b/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/modelbuild/codebuild-buildspec.yml
Lines changed: 20 additions & 0 deletions
diff --git a/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/modelbuild/pipelines/customer_churn/evaluate.py
Lines changed: 76 additions & 0 deletions b/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/modelbuild/pipelines/customer_churn/evaluate.py
Lines changed: 76 additions & 0 deletions
diff --git a/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/modelbuild/pipelines/customer_churn/pipeline.py
Lines changed: 293 additions & 0 deletions b/‎sagemaker-pipelines/tabular/customizing_build_train_deploy_project/modelbuild/pipelines/customer_churn/pipeline.py
Lines changed: 293 additions & 0 deletions
@@ -6,4 +6,5 @@ Amazon SageMaker Model Building Pipelines is a tool for building machine learnin
 .. toctree::
    :maxdepth: 1
 
-   sagemaker-pipelines-preprocess-train-evaluate-batch-transform
+   tabular/abalone_build_train_deploy/sagemaker-pipelines-preprocess-train-evaluate-batch-transform
+   tabular/customizing_build_train_deploy_project/modelbuild/sagemaker-pipelines-customized-project
@@ -0,0 +1,20 @@
+version: 0.2
+
+phases:
+  install:
+    runtime-versions:
+      python: 3.8
+    commands:
+      - pip install --upgrade --force-reinstall . awscli
+  
+  build:
+    commands:
+      - export PYTHONUNBUFFERED=TRUE
+      - export SAGEMAKER_PROJECT_NAME_ID="${SAGEMAKER_PROJECT_NAME}-${SAGEMAKER_PROJECT_ID}"
+      - |
+        run-pipeline --module-name pipelines.customer_churn.pipeline \
+          --role-arn $SAGEMAKER_PIPELINE_ROLE_ARN \
+          --tags "[{\"Key\":\"sagemaker:project-name\", \"Value\":\"${SAGEMAKER_PROJECT_NAME}\"}, {\"Key\":\"sagemaker:project-id\", \"Value\":\"${SAGEMAKER_PROJECT_ID}\"}]" \
+          --kwargs "{\"region\":\"${AWS_REGION}\",\"role\":\"${SAGEMAKER_PIPELINE_ROLE_ARN}\",\"default_bucket\":\"${ARTIFACT_BUCKET}\",\"pipeline_name\":\"${SAGEMAKER_PROJECT_NAME_ID}\",\"model_package_group_name\":\"${SAGEMAKER_PROJECT_NAME_ID}\",\"base_job_prefix\":\"${SAGEMAKER_PROJECT_NAME_ID}\"}"
+      - echo "Create/Update of the SageMaker Pipeline and execution completed."
+ 
@@ -0,0 +1,76 @@
+# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+"""Evaluation script for measuring model accuracy."""
+
+import json
+import os
+import tarfile
+import logging
+import pickle
+
+import pandas as pd
+import xgboost
+
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+logger.addHandler(logging.StreamHandler())
+
+# May need to import additional metrics depending on what you are measuring.
+# See https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-model-quality-metrics.html
+from sklearn.metrics import classification_report, roc_auc_score, accuracy_score
+
+
+if __name__ == "__main__":
+    model_path = "/opt/ml/processing/model/model.tar.gz"
+    with tarfile.open(model_path) as tar:
+        tar.extractall(path="..")
+
+    logger.debug("Loading xgboost model.")
+    model = pickle.load(open("xgboost-model", "rb"))
+
+    print("Loading test input data")
+    test_path = "/opt/ml/processing/test/test.csv"
+    df = pd.read_csv(test_path, header=None)
+
+    logger.debug("Reading test data.")
+    y_test = df.iloc[:, 0].to_numpy()
+    df.drop(df.columns[0], axis=1, inplace=True)
+    X_test = xgboost.DMatrix(df.values)
+
+    logger.info("Performing predictions against test data.")
+    predictions = model.predict(X_test)
+
+    print("Creating classification evaluation report")
+    acc = accuracy_score(y_test, predictions.round())
+    auc = roc_auc_score(y_test, predictions.round())
+
+    # The metrics reported can change based on the model used, but it must be a specific name per (https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-model-quality-metrics.html)
+    report_dict = {
+        "binary_classification_metrics": {
+            "accuracy": {
+                "value": acc,
+                "standard_deviation": "NaN",
+            },
+            "auc": {"value": auc, "standard_deviation": "NaN"},
+        },
+    }
+
+    print("Classification report:\n{}".format(report_dict))
+
+    evaluation_output_path = os.path.join(
+        "/opt/ml/processing/evaluation", "evaluation.json"
+    )
+    print("Saving classification report to {}".format(evaluation_output_path))
+
+    with open(evaluation_output_path, "w") as f:
+        f.write(json.dumps(report_dict))
@@ -0,0 +1,293 @@
+# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+"""Example workflow pipeline script for CustomerChurn pipeline.
+
+                                               . -RegisterModel
+                                              .
+    Process-> Train -> Evaluate -> Condition .
+                                              .
+                                               . -(stop)
+
+Implements a get_pipeline(**kwargs) method.
+"""
+
+import os
+
+import boto3
+import sagemaker
+import sagemaker.session
+
+from sagemaker.estimator import Estimator
+from sagemaker.inputs import TrainingInput
+from sagemaker.processing import (
+    ProcessingInput,
+    ProcessingOutput,
+    ScriptProcessor,
+)
+from sagemaker.sklearn.processing import SKLearnProcessor
+from sagemaker.workflow.conditions import (
+    ConditionGreaterThanOrEqualTo,
+)
+from sagemaker.workflow.condition_step import (
+    ConditionStep,
+    JsonGet,
+)
+from sagemaker.model_metrics import (
+    MetricsSource,
+    ModelMetrics,
+)
+from sagemaker.workflow.parameters import (
+    ParameterInteger,
+    ParameterString,
+)
+from sagemaker.workflow.pipeline import Pipeline
+from sagemaker.workflow.properties import PropertyFile
+from sagemaker.workflow.steps import (
+    ProcessingStep,
+    TrainingStep,
+)
+from sagemaker.workflow.step_collections import RegisterModel
+
+
+BASE_DIR = os.path.dirname(os.path.realpath(__file__))
+
+
+def get_session(region, default_bucket):
+    """Gets the sagemaker session based on the region.
+
+    Args:
+        region: the aws region to start the session
+        default_bucket: the bucket to use for storing the artifacts
+
+    Returns:
+        `sagemaker.session.Session instance
+    """
+
+    boto_session = boto3.Session(region_name=region)
+
+    sagemaker_client = boto_session.client("sagemaker")
+    runtime_client = boto_session.client("sagemaker-runtime")
+    return sagemaker.session.Session(
+        boto_session=boto_session,
+        sagemaker_client=sagemaker_client,
+        sagemaker_runtime_client=runtime_client,
+        default_bucket=default_bucket,
+    )
+
+
+def get_pipeline(
+    region,
+    role=None,
+    default_bucket=None,
+    model_package_group_name="CustomerChurnPackageGroup",  # Choose any name
+    pipeline_name="CustomerChurnDemo-p-ewf8t7lvhivm",  # You can find your pipeline name in the Studio UI (project -> Pipelines -> name)
+    base_job_prefix="CustomerChurn",  # Choose any name
+):
+    """Gets a SageMaker ML Pipeline instance working with on CustomerChurn data.
+
+    Args:
+        region: AWS region to create and run the pipeline.
+        role: IAM role to create and run steps and pipeline.
+        default_bucket: the bucket to use for storing the artifacts
+
+    Returns:
+        an instance of a pipeline
+    """
+    sagemaker_session = get_session(region, default_bucket)
+    if role is None:
+        role = sagemaker.session.get_execution_role(sagemaker_session)
+
+    # Parameters for pipeline execution
+    processing_instance_count = ParameterInteger(
+        name="ProcessingInstanceCount", default_value=1
+    )
+    processing_instance_type = ParameterString(
+        name="ProcessingInstanceType", default_value="ml.m5.xlarge"
+    )
+    training_instance_type = ParameterString(
+        name="TrainingInstanceType", default_value="ml.m5.xlarge"
+    )
+    model_approval_status = ParameterString(
+        name="ModelApprovalStatus",
+        default_value="PendingManualApproval",  # ModelApprovalStatus can be set to a default of "Approved" if you don't want manual approval.
+    )
+    input_data = ParameterString(
+        name="InputDataUrl",
+        default_value=f"s3://sm-pipelines-demo-data-123456789/churn.txt",  # Change this to point to the s3 location of your raw input data.
+    )
+
+    # Processing step for feature engineering
+    sklearn_processor = SKLearnProcessor(
+        framework_version="0.23-1",
+        instance_type=processing_instance_type,
+        instance_count=processing_instance_count,
+        base_job_name=f"{base_job_prefix}/sklearn-CustomerChurn-preprocess",  # choose any name
+        sagemaker_session=sagemaker_session,
+        role=role,
+    )
+    step_process = ProcessingStep(
+        name="CustomerChurnProcess",  # choose any name
+        processor=sklearn_processor,
+        outputs=[
+            ProcessingOutput(output_name="train", source="/opt/ml/processing/train"),
+            ProcessingOutput(
+                output_name="validation", source="/opt/ml/processing/validation"
+            ),
+            ProcessingOutput(output_name="test", source="/opt/ml/processing/test"),
+        ],
+        code=os.path.join(BASE_DIR, "preprocess.py"),
+        job_arguments=["--input-data", input_data],
+    )
+
+    # Training step for generating model artifacts
+    model_path = f"s3://{sagemaker_session.default_bucket()}/{base_job_prefix}/CustomerChurnTrain"
+    image_uri = sagemaker.image_uris.retrieve(
+        framework="xgboost",  # we are using the Sagemaker built in xgboost algorithm
+        region=region,
+        version="1.0-1",
+        py_version="py3",
+        instance_type=training_instance_type,
+    )
+    xgb_train = Estimator(
+        image_uri=image_uri,
+        instance_type=training_instance_type,
+        instance_count=1,
+        output_path=model_path,
+        base_job_name=f"{base_job_prefix}/CustomerChurn-train",
+        sagemaker_session=sagemaker_session,
+        role=role,
+    )
+    xgb_train.set_hyperparameters(
+        objective="binary:logistic",
+        num_round=50,
+        max_depth=5,
+        eta=0.2,
+        gamma=4,
+        min_child_weight=6,
+        subsample=0.7,
+        silent=0,
+    )
+    step_train = TrainingStep(
+        name="CustomerChurnTrain",
+        estimator=xgb_train,
+        inputs={
+            "train": TrainingInput(
+                s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
+                    "train"
+                ].S3Output.S3Uri,
+                content_type="text/csv",
+            ),
+            "validation": TrainingInput(
+                s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
+                    "validation"
+                ].S3Output.S3Uri,
+                content_type="text/csv",
+            ),
+        },
+    )
+
+    # Processing step for evaluation
+    script_eval = ScriptProcessor(
+        image_uri=image_uri,
+        command=["python3"],
+        instance_type=processing_instance_type,
+        instance_count=1,
+        base_job_name=f"{base_job_prefix}/script-CustomerChurn-eval",
+        sagemaker_session=sagemaker_session,
+        role=role,
+    )
+    evaluation_report = PropertyFile(
+        name="EvaluationReport",
+        output_name="evaluation",
+        path="evaluation.json",
+    )
+    step_eval = ProcessingStep(
+        name="CustomerChurnEval",
+        processor=script_eval,
+        inputs=[
+            ProcessingInput(
+                source=step_train.properties.ModelArtifacts.S3ModelArtifacts,
+                destination="/opt/ml/processing/model",
+            ),
+            ProcessingInput(
+                source=step_process.properties.ProcessingOutputConfig.Outputs[
+                    "test"
+                ].S3Output.S3Uri,
+                destination="/opt/ml/processing/test",
+            ),
+        ],
+        outputs=[
+            ProcessingOutput(
+                output_name="evaluation", source="/opt/ml/processing/evaluation"
+            ),
+        ],
+        code=os.path.join(BASE_DIR, "evaluate.py"),
+        property_files=[evaluation_report],
+    )
+
+    # Register model step that will be conditionally executed
+    model_metrics = ModelMetrics(
+        model_statistics=MetricsSource(
+            s3_uri="{}/evaluation.json".format(
+                step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"][
+                    "S3Uri"
+                ]
+            ),
+            content_type="application/json",
+        )
+    )
+
+    # Register model step that will be conditionally executed
+    step_register = RegisterModel(
+        name="CustomerChurnRegisterModel",
+        estimator=xgb_train,
+        model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
+        content_types=["text/csv"],
+        response_types=["text/csv"],
+        inference_instances=["ml.t2.medium", "ml.m5.large"],
+        transform_instances=["ml.m5.large"],
+        model_package_group_name=model_package_group_name,
+        approval_status=model_approval_status,
+        model_metrics=model_metrics,
+    )
+
+    # Condition step for evaluating model quality and branching execution
+    cond_lte = ConditionGreaterThanOrEqualTo(  # You can change the condition here
+        left=JsonGet(
+            step=step_eval,
+            property_file=evaluation_report,
+            json_path="binary_classification_metrics.accuracy.value",  # This should follow the structure of your report_dict defined in the evaluate.py file.
+        ),
+        right=0.8,  # You can change the threshold here
+    )
+    step_cond = ConditionStep(
+        name="CustomerChurnAccuracyCond",
+        conditions=[cond_lte],
+        if_steps=[step_register],
+        else_steps=[],
+    )
+
+    # Pipeline instance
+    pipeline = Pipeline(
+        name=pipeline_name,
+        parameters=[
+            processing_instance_type,
+            processing_instance_count,
+            training_instance_type,
+            model_approval_status,
+            input_data,
+        ],
+        steps=[step_process, step_train, step_eval, step_cond],
+        sagemaker_session=sagemaker_session,
+    )
+    return pipeline