Skip to content

Commit 7bc8eb9

Browse files
deprecation: add deprecation warning for s3_data_distribution_type in Clarify DataConfig
1 parent 2dc0f34 commit 7bc8eb9

File tree

2 files changed

+16
-3
lines changed

2 files changed

+16
-3
lines changed

src/sagemaker/clarify.py

+15-3
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,17 @@
2222
import tempfile
2323
from abc import ABC, abstractmethod
2424
from sagemaker import image_uris, s3, utils
25+
from sagemaker.deprecations import deprecation_warning
2526
from sagemaker.processing import ProcessingInput, ProcessingOutput, Processor
2627

2728
logger = logging.getLogger(__name__)
2829

2930

31+
@deprecation_warning(
32+
msg="s3_data_distribution_type parameter will no longer be supported. Everything else will"
33+
" remain as is",
34+
date="15 Mar 2022",
35+
)
3036
class DataConfig:
3137
"""Config object related to configurations of the input and output dataset."""
3238

@@ -58,8 +64,8 @@ def __init__(
5864
dataset format is JSONLines.
5965
dataset_type (str): Format of the dataset. Valid values are "text/csv" for CSV,
6066
"application/jsonlines" for JSONLines, and "application/x-parquet" for Parquet.
61-
s3_data_distribution_type (str): Valid options are "FullyReplicated" or
62-
"ShardedByS3Key".
67+
s3_data_distribution_type (str): Deprecated. Only valid option is "FullyReplicated".
68+
Any other value is ignored.
6369
s3_compression_type (str): Valid options are "None" or "Gzip".
6470
joinsource (str): The name or index of the column in the dataset that acts as an
6571
identifier column (for instance, while performing a join). This column is only
@@ -80,7 +86,13 @@ def __init__(
8086
self.s3_data_input_path = s3_data_input_path
8187
self.s3_output_path = s3_output_path
8288
self.s3_analysis_config_output_path = s3_analysis_config_output_path
83-
self.s3_data_distribution_type = s3_data_distribution_type
89+
if s3_data_distribution_type != "FullyReplicated":
90+
logger.warning(
91+
"s3_data_distribution_type parameter, set to %s, is being ignored. Only"
92+
" valid option is FullyReplicated",
93+
s3_data_distribution_type,
94+
)
95+
self.s3_data_distribution_type = "FullyReplicated"
8496
self.s3_compression_type = s3_compression_type
8597
self.label = label
8698
self.headers = headers

tests/integ/test_clarify.py

+1
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ def data_config(sagemaker_session, data_path, headers):
124124
label="Label",
125125
headers=headers,
126126
dataset_type="text/csv",
127+
s3_data_distribution_type="ShardedByS3Key",
127128
)
128129

129130

0 commit comments

Comments
 (0)