Skip to content

feat: Add tagging support for create ir job #3901

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/sagemaker/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -5313,6 +5313,7 @@ def _create_inference_recommendations_job_request(
framework: str,
sample_payload_url: str,
supported_content_types: List[str],
tags: Dict[str, str],
model_name: str = None,
model_package_version_arn: str = None,
job_duration_in_seconds: int = None,
Expand Down Expand Up @@ -5348,6 +5349,8 @@ def _create_inference_recommendations_job_request(
benchmarked by Amazon SageMaker Inference Recommender that matches your model.
supported_instance_types (List[str]): A list of the instance types that are used
to generate inferences in real-time.
tags (Dict[str, str]): Tags used to identify where the Inference Recommendatons Call
was made from.
endpoint_configurations (List[Dict[str, any]]): Specifies the endpoint configurations
to use for a job. Will be used for `Advanced` jobs.
traffic_pattern (Dict[str, any]): Specifies the traffic pattern for the job.
Expand Down Expand Up @@ -5386,6 +5389,7 @@ def _create_inference_recommendations_job_request(
"InputConfig": {
"ContainerConfig": containerConfig,
},
"Tags": tags,
}

request.get("InputConfig").update(
Expand Down Expand Up @@ -5477,6 +5481,8 @@ def create_inference_recommendations_job(
job_name = "SMPYTHONSDK-" + str(unique_tail)
job_description = "#python-sdk-create"

tags = [{"Key": "ClientType", "Value": "PythonSDK-RightSize"}]

create_inference_recommendations_job_request = (
self._create_inference_recommendations_job_request(
role=role,
Expand All @@ -5496,6 +5502,7 @@ def create_inference_recommendations_job(
traffic_pattern=traffic_pattern,
stopping_conditions=stopping_conditions,
resource_limit=resource_limit,
tags=tags,
)
)

Expand Down
112 changes: 68 additions & 44 deletions tests/integ/test_inference_recommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@
from __future__ import absolute_import

import os
import time

import pytest

from botocore.exceptions import ClientError
from sagemaker import image_uris
from sagemaker.model import Model
from sagemaker.sklearn.model import SKLearnModel, SKLearnPredictor
Expand All @@ -40,6 +42,18 @@
IR_SKLEARN_FRAMEWORK_VERSION = "1.0-1"


def retry_and_back_off(right_size_fn):
tot_retries = 3
retries = 1
while retries <= tot_retries:
try:
return right_size_fn
except ClientError as e:
if e.response["Error"]["Code"] == "ThrottlingException":
retries += 1
time.sleep(5 * retries)


@pytest.fixture(scope="module")
def default_right_sized_model(sagemaker_session, cpu_instance_type):
with timeout(minutes=45):
Expand Down Expand Up @@ -68,13 +82,15 @@ def default_right_sized_model(sagemaker_session, cpu_instance_type):
)

return (
sklearn_model_package.right_size(
job_name=ir_job_name,
sample_payload_url=payload_data,
supported_content_types=IR_SKLEARN_CONTENT_TYPE,
supported_instance_types=[cpu_instance_type],
framework=IR_SKLEARN_FRAMEWORK,
log_level="Quiet",
retry_and_back_off(
sklearn_model_package.right_size(
job_name=ir_job_name,
sample_payload_url=payload_data,
supported_content_types=IR_SKLEARN_CONTENT_TYPE,
supported_instance_types=[cpu_instance_type],
framework=IR_SKLEARN_FRAMEWORK,
log_level="Quiet",
)
),
model_package_group_name,
ir_job_name,
Expand Down Expand Up @@ -133,17 +149,19 @@ def advanced_right_sized_model(sagemaker_session, cpu_instance_type):
]

return (
sklearn_model_package.right_size(
sample_payload_url=payload_data,
supported_content_types=IR_SKLEARN_CONTENT_TYPE,
framework=IR_SKLEARN_FRAMEWORK,
job_duration_in_seconds=3600,
hyperparameter_ranges=hyperparameter_ranges,
phases=phases,
model_latency_thresholds=model_latency_thresholds,
max_invocations=100,
max_tests=5,
max_parallel_tests=5,
retry_and_back_off(
sklearn_model_package.right_size(
sample_payload_url=payload_data,
supported_content_types=IR_SKLEARN_CONTENT_TYPE,
framework=IR_SKLEARN_FRAMEWORK,
job_duration_in_seconds=3600,
hyperparameter_ranges=hyperparameter_ranges,
phases=phases,
model_latency_thresholds=model_latency_thresholds,
max_invocations=100,
max_tests=5,
max_parallel_tests=5,
)
),
model_package_group_name,
)
Expand Down Expand Up @@ -175,13 +193,15 @@ def default_right_sized_unregistered_model(sagemaker_session, cpu_instance_type)
)

return (
sklearn_model.right_size(
job_name=ir_job_name,
sample_payload_url=payload_data,
supported_content_types=IR_SKLEARN_CONTENT_TYPE,
supported_instance_types=[cpu_instance_type],
framework=IR_SKLEARN_FRAMEWORK,
log_level="Quiet",
retry_and_back_off(
sklearn_model.right_size(
job_name=ir_job_name,
sample_payload_url=payload_data,
supported_content_types=IR_SKLEARN_CONTENT_TYPE,
supported_instance_types=[cpu_instance_type],
framework=IR_SKLEARN_FRAMEWORK,
log_level="Quiet",
)
),
ir_job_name,
)
Expand Down Expand Up @@ -224,18 +244,20 @@ def advanced_right_sized_unregistered_model(sagemaker_session, cpu_instance_type
ModelLatencyThreshold(percentile="P95", value_in_milliseconds=100)
]

return sklearn_model.right_size(
sample_payload_url=payload_data,
supported_content_types=IR_SKLEARN_CONTENT_TYPE,
framework=IR_SKLEARN_FRAMEWORK,
job_duration_in_seconds=3600,
hyperparameter_ranges=hyperparameter_ranges,
phases=phases,
model_latency_thresholds=model_latency_thresholds,
max_invocations=100,
max_tests=5,
max_parallel_tests=5,
log_level="Quiet",
return retry_and_back_off(
sklearn_model.right_size(
sample_payload_url=payload_data,
supported_content_types=IR_SKLEARN_CONTENT_TYPE,
framework=IR_SKLEARN_FRAMEWORK,
job_duration_in_seconds=3600,
hyperparameter_ranges=hyperparameter_ranges,
phases=phases,
model_latency_thresholds=model_latency_thresholds,
max_invocations=100,
max_tests=5,
max_parallel_tests=5,
log_level="Quiet",
)
)

except Exception:
Expand Down Expand Up @@ -265,13 +287,15 @@ def default_right_sized_unregistered_base_model(sagemaker_session, cpu_instance_
)

return (
model.right_size(
job_name=ir_job_name,
sample_payload_url=payload_data,
supported_content_types=IR_SKLEARN_CONTENT_TYPE,
supported_instance_types=[cpu_instance_type],
framework=IR_SKLEARN_FRAMEWORK,
log_level="Quiet",
retry_and_back_off(
model.right_size(
job_name=ir_job_name,
sample_payload_url=payload_data,
supported_content_types=IR_SKLEARN_CONTENT_TYPE,
supported_instance_types=[cpu_instance_type],
framework=IR_SKLEARN_FRAMEWORK,
log_level="Quiet",
)
),
ir_job_name,
)
Expand Down
2 changes: 2 additions & 0 deletions tests/unit/test_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -4704,6 +4704,7 @@ def create_inference_recommendations_job_default_happy_response():
"ModelPackageVersionArn": IR_MODEL_PACKAGE_VERSION_ARN,
},
"JobDescription": "#python-sdk-create",
"Tags": [{"Key": "ClientType", "Value": "PythonSDK-RightSize"}],
}


Expand All @@ -4728,6 +4729,7 @@ def create_inference_recommendations_job_default_model_name_happy_response():
"ModelName": IR_MODEL_NAME,
},
"JobDescription": "#python-sdk-create",
"Tags": [{"Key": "ClientType", "Value": "PythonSDK-RightSize"}],
}


Expand Down