Skip to content

feature: network isolation mode for xgboost #2626

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Oct 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions src/sagemaker/xgboost/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,13 +145,16 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None):
)

deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
self._upload_code(deploy_key_prefix)
self._upload_code(key_prefix=deploy_key_prefix, repack=self.enable_network_isolation())
deploy_env = dict(self.env)
deploy_env.update(self._framework_env_vars())

if self.model_server_workers:
deploy_env[MODEL_SERVER_WORKERS_PARAM_NAME.upper()] = str(self.model_server_workers)
return sagemaker.container_def(deploy_image, self.model_data, deploy_env)
model_data = (
self.repacked_model_data if self.enable_network_isolation() else self.model_data
)
return sagemaker.container_def(deploy_image, model_data, deploy_env)

def serving_image_uri(self, region_name, instance_type):
"""Create a URI for the serving image.
Expand Down
50 changes: 50 additions & 0 deletions tests/data/xgboost_abalone/abalone.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import argparse
import os

from sagemaker_xgboost_container.data_utils import get_dmatrix

import xgboost as xgb

model_filename = "xgboost-model"

if __name__ == "__main__":
parser = argparse.ArgumentParser()

# Sagemaker specific arguments. Defaults are set in the environment variables.
parser.add_argument(
"--model_dir", type=str, default=os.environ.get("SM_MODEL_DIR", "/opt/ml/model")
)
parser.add_argument(
"--train",
type=str,
default=os.environ.get("SM_CHANNEL_TRAIN", "/opt/ml/input/data/abalone"),
)

args, _ = parser.parse_known_args()

dtrain = get_dmatrix(args.train, "libsvm")

params = {
"max_depth": 5,
"eta": 0.2,
"gamma": 4,
"min_child_weight": 6,
"subsample": 0.7,
"verbosity": 2,
"objective": "reg:squarederror",
"tree_method": "auto",
"predictor": "auto",
}

booster = xgb.train(params=params, dtrain=dtrain, num_boost_round=50)
booster.save_model(args.model_dir + "/" + model_filename)


def model_fn(model_dir):
"""Deserialize and return fitted model.

Note that this should have the same name as the serialized model in the _xgb_train method
"""
booster = xgb.Booster()
booster.load_model(os.path.join(model_dir, model_filename))
return booster
34 changes: 34 additions & 0 deletions tests/integ/test_xgboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

import os
import pytest
from sagemaker.utils import unique_name_from_base
from sagemaker.xgboost import XGBoost
from sagemaker.xgboost.processing import XGBoostProcessor
from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES
from tests.integ.timeout import timeout
Expand Down Expand Up @@ -48,3 +50,35 @@ def test_framework_processing_job_with_deps(
inputs=[],
wait=True,
)


def test_training_with_network_isolation(
sagemaker_session,
xgboost_latest_version,
xgboost_latest_py_version,
cpu_instance_type,
):
with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
base_job_name = "test-network-isolation-xgboost"

xgboost = XGBoost(
entry_point=os.path.join(DATA_DIR, "xgboost_abalone", "abalone.py"),
role=ROLE,
instance_type=cpu_instance_type,
instance_count=1,
framework_version=xgboost_latest_version,
py_version=xgboost_latest_py_version,
base_job_name=base_job_name,
sagemaker_session=sagemaker_session,
enable_network_isolation=True,
)

train_input = xgboost.sagemaker_session.upload_data(
path=os.path.join(DATA_DIR, "xgboost_abalone", "abalone"),
key_prefix="integ-test-data/xgboost_abalone/abalone",
)
job_name = unique_name_from_base(base_job_name)
xgboost.fit(inputs={"train": train_input}, job_name=job_name)
assert sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=job_name)[
"EnableNetworkIsolation"
]
21 changes: 21 additions & 0 deletions tests/unit/test_xgboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from packaging.version import Version


from sagemaker.fw_utils import UploadedCode
from sagemaker.xgboost import XGBoost, XGBoostModel, XGBoostPredictor


Expand Down Expand Up @@ -180,6 +181,26 @@ def test_create_model(sagemaker_session, xgboost_framework_version):
assert model_values["Image"] == default_image_uri


@patch("sagemaker.model.FrameworkModel._upload_code")
def test_create_model_with_network_isolation(upload, sagemaker_session, xgboost_framework_version):
source_dir = "s3://mybucket/source"
repacked_model_data = "s3://mybucket/prefix/model.tar.gz"

xgboost_model = XGBoostModel(
model_data=source_dir,
role=ROLE,
sagemaker_session=sagemaker_session,
entry_point=SCRIPT_PATH,
framework_version=xgboost_framework_version,
enable_network_isolation=True,
)
xgboost_model.uploaded_code = UploadedCode(s3_prefix=repacked_model_data, script_name="script")
xgboost_model.repacked_model_data = repacked_model_data
model_values = xgboost_model.prepare_container_def(CPU)
assert model_values["Environment"]["SAGEMAKER_SUBMIT_DIRECTORY"] == "/opt/ml/model/code"
assert model_values["ModelDataUrl"] == repacked_model_data


@patch("sagemaker.estimator.name_from_base")
def test_create_model_from_estimator(name_from_base, sagemaker_session, xgboost_framework_version):
container_log_level = '"logging.INFO"'
Expand Down