22
22
import tempfile
23
23
from abc import ABC , abstractmethod
24
24
from sagemaker import image_uris , s3 , utils
25
+ from sagemaker .deprecations import deprecation_warning
25
26
from sagemaker .processing import ProcessingInput , ProcessingOutput , Processor
26
27
27
28
logger = logging .getLogger (__name__ )
28
29
29
30
31
+ @deprecation_warning (
32
+ msg = "s3_data_distribution_type parameter will no longer be supported. Everything else will"
33
+ " remain as is" ,
34
+ date = "15 Mar 2022" ,
35
+ )
30
36
class DataConfig :
31
37
"""Config object related to configurations of the input and output dataset."""
32
38
@@ -58,8 +64,8 @@ def __init__(
58
64
dataset format is JSONLines.
59
65
dataset_type (str): Format of the dataset. Valid values are "text/csv" for CSV,
60
66
"application/jsonlines" for JSONLines, and "application/x-parquet" for Parquet.
61
- s3_data_distribution_type (str): Valid options are "FullyReplicated" or
62
- "ShardedByS3Key" .
67
+ s3_data_distribution_type (str): Deprecated. Only valid option is "FullyReplicated".
68
+ Any other value is ignored .
63
69
s3_compression_type (str): Valid options are "None" or "Gzip".
64
70
joinsource (str): The name or index of the column in the dataset that acts as an
65
71
identifier column (for instance, while performing a join). This column is only
@@ -80,7 +86,13 @@ def __init__(
80
86
self .s3_data_input_path = s3_data_input_path
81
87
self .s3_output_path = s3_output_path
82
88
self .s3_analysis_config_output_path = s3_analysis_config_output_path
83
- self .s3_data_distribution_type = s3_data_distribution_type
89
+ if s3_data_distribution_type != "FullyReplicated" :
90
+ logger .warning (
91
+ "s3_data_distribution_type parameter, set to %s, is being ignored. Only"
92
+ " valid option is FullyReplicated" ,
93
+ s3_data_distribution_type ,
94
+ )
95
+ self .s3_data_distribution_type = "FullyReplicated"
84
96
self .s3_compression_type = s3_compression_type
85
97
self .label = label
86
98
self .headers = headers
0 commit comments