Skip to content

change: Add PipelineVariable annotation for all processor subclasses #3328

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
230 changes: 117 additions & 113 deletions src/sagemaker/clarify.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@

import tempfile
from abc import ABC, abstractmethod
from typing import List, Union, Dict
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently we don't support clarify processor integrated directly with Pipeline. Thus no PipelineVariable annotations are added to this clarify.py

from typing import List, Union, Dict, Optional, Any

from sagemaker import image_uris, s3, utils
from sagemaker.session import Session
from sagemaker.network import NetworkConfig
from sagemaker.processing import ProcessingInput, ProcessingOutput, Processor

logger = logging.getLogger(__name__)
Expand All @@ -38,21 +40,21 @@ class DataConfig:

def __init__(
self,
s3_data_input_path,
s3_output_path,
s3_analysis_config_output_path=None,
label=None,
headers=None,
features=None,
dataset_type="text/csv",
s3_compression_type="None",
joinsource=None,
facet_dataset_uri=None,
facet_headers=None,
predicted_label_dataset_uri=None,
predicted_label_headers=None,
predicted_label=None,
excluded_columns=None,
s3_data_input_path: str,
s3_output_path: str,
s3_analysis_config_output_path: Optional[str] = None,
label: Optional[str] = None,
headers: Optional[List[str]] = None,
features: Optional[List[str]] = None,
dataset_type: str = "text/csv",
s3_compression_type: str = "None",
joinsource: Optional[Union[str, int]] = None,
facet_dataset_uri: Optional[str] = None,
facet_headers: Optional[List[str]] = None,
predicted_label_dataset_uri: Optional[str] = None,
predicted_label_headers: Optional[List[str]] = None,
predicted_label: Optional[Union[str, int]] = None,
excluded_columns: Optional[Union[List[int], List[str]]] = None,
):
"""Initializes a configuration of both input and output datasets.

Expand All @@ -65,7 +67,7 @@ def __init__(
label (str): Target attribute of the model required by bias metrics.
Specified as column name or index for CSV dataset or as JSONPath for JSONLines.
*Required parameter* except for when the input dataset does not contain the label.
features (str): JSONPath for locating the feature columns for bias metrics if the
features (List[str]): JSONPath for locating the feature columns for bias metrics if the
dataset format is JSONLines.
dataset_type (str): Format of the dataset. Valid values are ``"text/csv"`` for CSV,
``"application/jsonlines"`` for JSONLines, and
Expand Down Expand Up @@ -191,10 +193,10 @@ class BiasConfig:

def __init__(
self,
label_values_or_threshold,
facet_name,
facet_values_or_threshold=None,
group_name=None,
label_values_or_threshold: Union[int, float, str],
facet_name: Union[str, int, List[str], List[int]],
facet_values_or_threshold: Optional[Union[int, float, str]] = None,
group_name: Optional[str] = None,
):
"""Initializes a configuration of the sensitive groups in the dataset.

Expand Down Expand Up @@ -275,17 +277,17 @@ class ModelConfig:

def __init__(
self,
model_name: str = None,
instance_count: int = None,
instance_type: str = None,
accept_type: str = None,
content_type: str = None,
content_template: str = None,
custom_attributes: str = None,
accelerator_type: str = None,
endpoint_name_prefix: str = None,
target_model: str = None,
endpoint_name: str = None,
model_name: Optional[str] = None,
instance_count: Optional[int] = None,
instance_type: Optional[str] = None,
accept_type: Optional[str] = None,
content_type: Optional[str] = None,
content_template: Optional[str] = None,
custom_attributes: Optional[str] = None,
accelerator_type: Optional[str] = None,
endpoint_name_prefix: Optional[str] = None,
target_model: Optional[str] = None,
endpoint_name: Optional[str] = None,
):
r"""Initializes a configuration of a model and the endpoint to be created for it.

Expand Down Expand Up @@ -414,10 +416,10 @@ class ModelPredictedLabelConfig:

def __init__(
self,
label=None,
probability=None,
probability_threshold=None,
label_headers=None,
label: Optional[Union[str, int]] = None,
probability: Optional[Union[str, int]] = None,
probability_threshold: Optional[float] = None,
label_headers: Optional[List[str]] = None,
):
"""Initializes a model output config to extract the predicted label or predicted score(s).

Expand Down Expand Up @@ -509,7 +511,9 @@ class PDPConfig(ExplainabilityConfig):
and the corresponding values are included in the analysis output.
""" # noqa E501

def __init__(self, features=None, grid_resolution=15, top_k_features=10):
def __init__(
self, features: Optional[List] = None, grid_resolution: int = 15, top_k_features: int = 10
):
"""Initializes PDP config.

Args:
Expand Down Expand Up @@ -680,8 +684,8 @@ class TextConfig:

def __init__(
self,
granularity,
language,
granularity: str,
language: str,
):
"""Initializes a text configuration.

Expand Down Expand Up @@ -736,13 +740,13 @@ class ImageConfig:

def __init__(
self,
model_type,
num_segments=None,
feature_extraction_method=None,
segment_compactness=None,
max_objects=None,
iou_threshold=None,
context=None,
model_type: str,
num_segments: Optional[int] = None,
feature_extraction_method: Optional[str] = None,
segment_compactness: Optional[float] = None,
max_objects: Optional[int] = None,
iou_threshold: Optional[float] = None,
context: Optional[float] = None,
):
"""Initializes a config object for Computer Vision (CV) Image explainability.

Expand Down Expand Up @@ -817,15 +821,15 @@ class SHAPConfig(ExplainabilityConfig):

def __init__(
self,
baseline=None,
num_samples=None,
agg_method=None,
use_logit=False,
save_local_shap_values=True,
seed=None,
num_clusters=None,
text_config=None,
image_config=None,
baseline: Optional[Union[str, List]] = None,
num_samples: Optional[int] = None,
agg_method: Optional[str] = None,
use_logit: bool = False,
save_local_shap_values: bool = True,
seed: Optional[int] = None,
num_clusters: Optional[int] = None,
text_config: Optional[TextConfig] = None,
image_config: Optional[ImageConfig] = None,
):
"""Initializes config for SHAP analysis.

Expand Down Expand Up @@ -909,19 +913,19 @@ class SageMakerClarifyProcessor(Processor):

def __init__(
self,
role,
instance_count,
instance_type,
volume_size_in_gb=30,
volume_kms_key=None,
output_kms_key=None,
max_runtime_in_seconds=None,
sagemaker_session=None,
env=None,
tags=None,
network_config=None,
job_name_prefix=None,
version=None,
role: str,
instance_count: int,
instance_type: str,
volume_size_in_gb: int = 30,
volume_kms_key: Optional[str] = None,
output_kms_key: Optional[str] = None,
max_runtime_in_seconds: Optional[int] = None,
sagemaker_session: Optional[Session] = None,
env: Optional[Dict[str, str]] = None,
tags: Optional[List[Dict[str, str]]] = None,
network_config: Optional[NetworkConfig] = None,
job_name_prefix: Optional[str] = None,
version: Optional[str] = None,
):
"""Initializes a SageMakerClarifyProcessor to compute bias metrics and model explanations.

Expand Down Expand Up @@ -993,13 +997,13 @@ def run(self, **_):

def _run(
self,
data_config,
analysis_config,
wait,
logs,
job_name,
kms_key,
experiment_config,
data_config: DataConfig,
analysis_config: Dict[str, Any],
wait: bool,
logs: bool,
job_name: str,
kms_key: str,
experiment_config: Dict[str, str],
):
"""Runs a :class:`~sagemaker.processing.ProcessingJob` with the SageMaker Clarify container

Expand Down Expand Up @@ -1077,14 +1081,14 @@ def _run(

def run_pre_training_bias(
self,
data_config,
data_bias_config,
methods="all",
wait=True,
logs=True,
job_name=None,
kms_key=None,
experiment_config=None,
data_config: DataConfig,
data_bias_config: BiasConfig,
methods: Union[str, List[str]] = "all",
wait: bool = True,
logs: bool = True,
job_name: Optional[str] = None,
kms_key: Optional[str] = None,
experiment_config: Optional[Dict[str, str]] = None,
):
"""Runs a :class:`~sagemaker.processing.ProcessingJob` to compute pre-training bias methods

Expand Down Expand Up @@ -1146,16 +1150,16 @@ def run_pre_training_bias(

def run_post_training_bias(
self,
data_config,
data_bias_config,
model_config,
model_predicted_label_config,
methods="all",
wait=True,
logs=True,
job_name=None,
kms_key=None,
experiment_config=None,
data_config: DataConfig,
data_bias_config: BiasConfig,
model_config: ModelConfig,
model_predicted_label_config: ModelPredictedLabelConfig,
methods: Union[str, List[str]] = "all",
wait: bool = True,
logs: bool = True,
job_name: Optional[str] = None,
kms_key: Optional[str] = None,
experiment_config: Optional[Dict[str, str]] = None,
):
"""Runs a :class:`~sagemaker.processing.ProcessingJob` to compute posttraining bias

Expand Down Expand Up @@ -1231,17 +1235,17 @@ def run_post_training_bias(

def run_bias(
self,
data_config,
bias_config,
model_config,
model_predicted_label_config=None,
pre_training_methods="all",
post_training_methods="all",
wait=True,
logs=True,
job_name=None,
kms_key=None,
experiment_config=None,
data_config: DataConfig,
bias_config: BiasConfig,
model_config: ModelConfig,
model_predicted_label_config: Optional[ModelPredictedLabelConfig] = None,
pre_training_methods: Union[str, List[str]] = "all",
post_training_methods: Union[str, List[str]] = "all",
wait: bool = True,
logs: bool = True,
job_name: Optional[str] = None,
kms_key: Optional[str] = None,
experiment_config: Optional[Dict[str, str]] = None,
):
"""Runs a :class:`~sagemaker.processing.ProcessingJob` to compute the requested bias methods

Expand Down Expand Up @@ -1325,15 +1329,15 @@ def run_bias(

def run_explainability(
self,
data_config,
model_config,
explainability_config,
model_scores=None,
wait=True,
logs=True,
job_name=None,
kms_key=None,
experiment_config=None,
data_config: DataConfig,
model_config: ModelConfig,
explainability_config: Union[ExplainabilityConfig, List],
model_scores: Optional[Union[int, str, ModelPredictedLabelConfig]] = None,
wait: bool = True,
logs: bool = True,
job_name: Optional[str] = None,
kms_key: Optional[str] = None,
experiment_config: Optional[Dict[str, str]] = None,
):
"""Runs a :class:`~sagemaker.processing.ProcessingJob` computing feature attributions.

Expand Down
2 changes: 1 addition & 1 deletion src/sagemaker/fw_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,7 @@ def validate_smdistributed(
if "smdistributed" not in distribution:
# Distribution strategy other than smdistributed is selected
return
if is_pipeline_variable(instance_type):
if is_pipeline_variable(instance_type) or is_pipeline_variable(image_uri):
# The instance_type is not available in compile time.
# Rather, it's given in Pipeline execution time
return
Expand Down
Loading