aws · shreyapandit · May 3, 2022 · Mar 22, 2022 · May 3, 2022
@@ -22,17 +22,11 @@
 import tempfile
 from abc import ABC, abstractmethod
 from sagemaker import image_uris, s3, utils
-from sagemaker.deprecations import deprecation_warning
 from sagemaker.processing import ProcessingInput, ProcessingOutput, Processor
 
 logger = logging.getLogger(__name__)
 
 
-@deprecation_warning(
-    msg="s3_data_distribution_type parameter will no longer be supported. Everything else will"
-    " remain as is",
-    date="15 Mar 2022",
-)
 class DataConfig:
     """Config object related to configurations of the input and output dataset."""
 
@@ -45,7 +39,6 @@ def __init__(
         headers=None,
         features=None,
         dataset_type="text/csv",
-        s3_data_distribution_type="FullyReplicated",
         s3_compression_type="None",
         joinsource=None,
     ):
@@ -64,8 +57,6 @@ def __init__(
                 dataset format is JSONLines.
             dataset_type (str): Format of the dataset. Valid values are "text/csv" for CSV,
                 "application/jsonlines" for JSONLines, and "application/x-parquet" for Parquet.
-            s3_data_distribution_type (str): Deprecated. Only valid option is "FullyReplicated".
-                Any other value is ignored.
             s3_compression_type (str): Valid options are "None" or "Gzip".
             joinsource (str): The name or index of the column in the dataset that acts as an
                 identifier column (for instance, while performing a join). This column is only
@@ -86,12 +77,6 @@ def __init__(
         self.s3_data_input_path = s3_data_input_path
         self.s3_output_path = s3_output_path
         self.s3_analysis_config_output_path = s3_analysis_config_output_path
-        if s3_data_distribution_type != "FullyReplicated":
-            logger.warning(
-                "s3_data_distribution_type parameter, set to %s, is being ignored. Only"
-                " valid option is FullyReplicated",
-                s3_data_distribution_type,
-            )
         self.s3_data_distribution_type = "FullyReplicated"
         self.s3_compression_type = s3_compression_type
         self.label = label

@@ -90,7 +90,6 @@ def test_s3_data_distribution_type_ignorance():
         headers=["Label", "F1", "F2", "F3", "F4"],
         dataset_type="text/csv",
         joinsource="F4",
-        s3_data_distribution_type="ShardedByS3Key",
     )
     assert data_config.s3_data_distribution_type == "FullyReplicated"