22
22
import tempfile
23
23
from abc import ABC , abstractmethod
24
24
from sagemaker import image_uris , s3 , utils
25
- from sagemaker .deprecations import deprecation_warning
26
25
from sagemaker .processing import ProcessingInput , ProcessingOutput , Processor
27
26
28
27
logger = logging .getLogger (__name__ )
29
28
30
29
31
- @deprecation_warning (
32
- msg = "s3_data_distribution_type parameter will no longer be supported. Everything else will"
33
- " remain as is" ,
34
- date = "15 Mar 2022" ,
35
- )
36
30
class DataConfig :
37
31
"""Config object related to configurations of the input and output dataset."""
38
32
@@ -64,8 +58,8 @@ def __init__(
64
58
dataset format is JSONLines.
65
59
dataset_type (str): Format of the dataset. Valid values are "text/csv" for CSV,
66
60
"application/jsonlines" for JSONLines, and "application/x-parquet" for Parquet.
67
- s3_data_distribution_type (str): Deprecated. Only valid option is "FullyReplicated".
68
- Any other value is ignored .
61
+ s3_data_distribution_type (str): Valid options are "FullyReplicated" or
62
+ "ShardedByS3Key" .
69
63
s3_compression_type (str): Valid options are "None" or "Gzip".
70
64
joinsource (str): The name or index of the column in the dataset that acts as an
71
65
identifier column (for instance, while performing a join). This column is only
@@ -86,13 +80,7 @@ def __init__(
86
80
self .s3_data_input_path = s3_data_input_path
87
81
self .s3_output_path = s3_output_path
88
82
self .s3_analysis_config_output_path = s3_analysis_config_output_path
89
- if s3_data_distribution_type != "FullyReplicated" :
90
- logger .warning (
91
- "s3_data_distribution_type parameter, set to %s, is being ignored. Only"
92
- " valid option is FullyReplicated" ,
93
- s3_data_distribution_type ,
94
- )
95
- self .s3_data_distribution_type = "FullyReplicated"
83
+ self .s3_data_distribution_type = s3_data_distribution_type
96
84
self .s3_compression_type = s3_compression_type
97
85
self .label = label
98
86
self .headers = headers
0 commit comments