Skip to content

Commit 2852236

Browse files
authored
Revert "Add deprecation warning for s3_data_distribution_type in Clarify Data Config (#2847)"
This reverts commit d9e2567.
1 parent d9e2567 commit 2852236

File tree

2 files changed

+3
-28
lines changed

2 files changed

+3
-28
lines changed

src/sagemaker/clarify.py

+3-15
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,11 @@
2222
import tempfile
2323
from abc import ABC, abstractmethod
2424
from sagemaker import image_uris, s3, utils
25-
from sagemaker.deprecations import deprecation_warning
2625
from sagemaker.processing import ProcessingInput, ProcessingOutput, Processor
2726

2827
logger = logging.getLogger(__name__)
2928

3029

31-
@deprecation_warning(
32-
msg="s3_data_distribution_type parameter will no longer be supported. Everything else will"
33-
" remain as is",
34-
date="15 Mar 2022",
35-
)
3630
class DataConfig:
3731
"""Config object related to configurations of the input and output dataset."""
3832

@@ -64,8 +58,8 @@ def __init__(
6458
dataset format is JSONLines.
6559
dataset_type (str): Format of the dataset. Valid values are "text/csv" for CSV,
6660
"application/jsonlines" for JSONLines, and "application/x-parquet" for Parquet.
67-
s3_data_distribution_type (str): Deprecated. Only valid option is "FullyReplicated".
68-
Any other value is ignored.
61+
s3_data_distribution_type (str): Valid options are "FullyReplicated" or
62+
"ShardedByS3Key".
6963
s3_compression_type (str): Valid options are "None" or "Gzip".
7064
joinsource (str): The name or index of the column in the dataset that acts as an
7165
identifier column (for instance, while performing a join). This column is only
@@ -86,13 +80,7 @@ def __init__(
8680
self.s3_data_input_path = s3_data_input_path
8781
self.s3_output_path = s3_output_path
8882
self.s3_analysis_config_output_path = s3_analysis_config_output_path
89-
if s3_data_distribution_type != "FullyReplicated":
90-
logger.warning(
91-
"s3_data_distribution_type parameter, set to %s, is being ignored. Only"
92-
" valid option is FullyReplicated",
93-
s3_data_distribution_type,
94-
)
95-
self.s3_data_distribution_type = "FullyReplicated"
83+
self.s3_data_distribution_type = s3_data_distribution_type
9684
self.s3_compression_type = s3_compression_type
9785
self.label = label
9886
self.headers = headers

tests/unit/test_clarify.py

-13
Original file line numberDiff line numberDiff line change
@@ -82,19 +82,6 @@ def test_invalid_data_config():
8282
)
8383

8484

85-
def test_s3_data_distribution_type_ignorance():
86-
data_config = DataConfig(
87-
s3_data_input_path="s3://input/train.csv",
88-
s3_output_path="s3://output/analysis_test_result",
89-
label="Label",
90-
headers=["Label", "F1", "F2", "F3", "F4"],
91-
dataset_type="text/csv",
92-
joinsource="F4",
93-
s3_data_distribution_type="ShardedByS3Key",
94-
)
95-
assert data_config.s3_data_distribution_type == "FullyReplicated"
96-
97-
9885
def test_bias_config():
9986
label_values = [1]
10087
facet_name = "F1"

0 commit comments

Comments
 (0)