Skip to content

deprecate: Remove deprecated argument s3_data_distribution_type #3064

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 0 additions & 15 deletions src/sagemaker/clarify.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,11 @@
import tempfile
from abc import ABC, abstractmethod
from sagemaker import image_uris, s3, utils
from sagemaker.deprecations import deprecation_warning
from sagemaker.processing import ProcessingInput, ProcessingOutput, Processor

logger = logging.getLogger(__name__)


@deprecation_warning(
msg="s3_data_distribution_type parameter will no longer be supported. Everything else will"
" remain as is",
date="15 Mar 2022",
)
class DataConfig:
"""Config object related to configurations of the input and output dataset."""

Expand All @@ -45,7 +39,6 @@ def __init__(
headers=None,
features=None,
dataset_type="text/csv",
s3_data_distribution_type="FullyReplicated",
s3_compression_type="None",
joinsource=None,
):
Expand All @@ -64,8 +57,6 @@ def __init__(
dataset format is JSONLines.
dataset_type (str): Format of the dataset. Valid values are "text/csv" for CSV,
"application/jsonlines" for JSONLines, and "application/x-parquet" for Parquet.
s3_data_distribution_type (str): Deprecated. Only valid option is "FullyReplicated".
Any other value is ignored.
s3_compression_type (str): Valid options are "None" or "Gzip".
joinsource (str): The name or index of the column in the dataset that acts as an
identifier column (for instance, while performing a join). This column is only
Expand All @@ -86,12 +77,6 @@ def __init__(
self.s3_data_input_path = s3_data_input_path
self.s3_output_path = s3_output_path
self.s3_analysis_config_output_path = s3_analysis_config_output_path
if s3_data_distribution_type != "FullyReplicated":
logger.warning(
"s3_data_distribution_type parameter, set to %s, is being ignored. Only"
" valid option is FullyReplicated",
s3_data_distribution_type,
)
self.s3_data_distribution_type = "FullyReplicated"
self.s3_compression_type = s3_compression_type
self.label = label
Expand Down
1 change: 0 additions & 1 deletion tests/unit/test_clarify.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ def test_s3_data_distribution_type_ignorance():
headers=["Label", "F1", "F2", "F3", "F4"],
dataset_type="text/csv",
joinsource="F4",
s3_data_distribution_type="ShardedByS3Key",
)
assert data_config.s3_data_distribution_type == "FullyReplicated"

Expand Down