@@ -28,19 +28,20 @@ class RedshiftDatasetDefinition(ApiObject):
28
28
With this input, SQL queries will be executed using Redshift to generate datasets to S3.
29
29
30
30
Parameters:
31
- cluster_id (str): The Redshift cluster Identifier.
32
- database (str): The name of the Redshift database used in Redshift query execution.
33
- db_user (str): The database user name used in Redshift query execution.
34
- query_string (str): The SQL query statements to be executed.
35
- cluster_role_arn (str): The IAM role attached to your Redshift cluster that
31
+ cluster_id (str, default=None): The Redshift cluster Identifier.
32
+ database (str, default=None):
33
+ The name of the Redshift database used in Redshift query execution.
34
+ db_user (str, default=None): The database user name used in Redshift query execution.
35
+ query_string (str, default=None): The SQL query statements to be executed.
36
+ cluster_role_arn (str, default=None): The IAM role attached to your Redshift cluster that
36
37
Amazon SageMaker uses to generate datasets.
37
- output_s3_uri (str): The location in Amazon S3 where the Redshift query
38
+ output_s3_uri (str, default=None ): The location in Amazon S3 where the Redshift query
38
39
results are stored.
39
- kms_key_id (str): The AWS Key Management Service (AWS KMS) key that Amazon
40
+ kms_key_id (str, default=None ): The AWS Key Management Service (AWS KMS) key that Amazon
40
41
SageMaker uses to encrypt data from a Redshift execution.
41
- output_format (str): The data storage format for Redshift query results.
42
+ output_format (str, default=None ): The data storage format for Redshift query results.
42
43
Valid options are "PARQUET", "CSV"
43
- output_compression (str): The compression used for Redshift query results.
44
+ output_compression (str, default=None ): The compression used for Redshift query results.
44
45
Valid options are "None", "GZIP", "SNAPPY", "ZSTD", "BZIP2"
45
46
"""
46
47
@@ -54,23 +55,50 @@ class RedshiftDatasetDefinition(ApiObject):
54
55
output_format = None
55
56
output_compression = None
56
57
58
+ def __init__ (
59
+ self ,
60
+ cluster_id = None ,
61
+ database = None ,
62
+ db_user = None ,
63
+ query_string = None ,
64
+ cluster_role_arn = None ,
65
+ output_s3_uri = None ,
66
+ kms_key_id = None ,
67
+ output_format = None ,
68
+ output_compression = None ,
69
+ ):
70
+ """Initialize RedshiftDatasetDefinition."""
71
+ super (RedshiftDatasetDefinition , self ).__init__ (
72
+ cluster_id = cluster_id ,
73
+ database = database ,
74
+ db_user = db_user ,
75
+ query_string = query_string ,
76
+ cluster_role_arn = cluster_role_arn ,
77
+ output_s3_uri = output_s3_uri ,
78
+ kms_key_id = kms_key_id ,
79
+ output_format = output_format ,
80
+ output_compression = output_compression ,
81
+ )
82
+
57
83
58
84
class AthenaDatasetDefinition (ApiObject ):
59
85
"""DatasetDefinition for Athena.
60
86
61
87
With this input, SQL queries will be executed using Athena to generate datasets to S3.
62
88
63
89
Parameters:
64
- catalog (str): The name of the data catalog used in Athena query execution.
65
- database (str): The name of the database used in the Athena query execution.
66
- query_string (str): The SQL query statements, to be executed.
67
- output_s3_uri (str): The location in Amazon S3 where Athena query results are stored.
68
- work_group (str): The name of the workgroup in which the Athena query is being started.
69
- kms_key_id (str): The AWS Key Management Service (AWS KMS) key that Amazon
90
+ catalog (str, default=None): The name of the data catalog used in Athena query execution.
91
+ database (str, default=None): The name of the database used in the Athena query execution.
92
+ query_string (str, default=None): The SQL query statements, to be executed.
93
+ output_s3_uri (str, default=None):
94
+ The location in Amazon S3 where Athena query results are stored.
95
+ work_group (str, default=None):
96
+ The name of the workgroup in which the Athena query is being started.
97
+ kms_key_id (str, default=None): The AWS Key Management Service (AWS KMS) key that Amazon
70
98
SageMaker uses to encrypt data generated from an Athena query execution.
71
- output_format (str): The data storage format for Athena query results.
99
+ output_format (str, default=None ): The data storage format for Athena query results.
72
100
Valid options are "PARQUET", "ORC", "AVRO", "JSON", "TEXTFILE"
73
- output_compression (str): The compression used for Athena query results.
101
+ output_compression (str, default=None ): The compression used for Athena query results.
74
102
Valid options are "GZIP", "SNAPPY", "ZLIB"
75
103
"""
76
104
@@ -83,24 +111,51 @@ class AthenaDatasetDefinition(ApiObject):
83
111
output_format = None
84
112
output_compression = None
85
113
114
+ def __init__ (
115
+ self ,
116
+ catalog = None ,
117
+ database = None ,
118
+ query_string = None ,
119
+ output_s3_uri = None ,
120
+ work_group = None ,
121
+ kms_key_id = None ,
122
+ output_format = None ,
123
+ output_compression = None ,
124
+ ):
125
+ """Initialize AthenaDatasetDefinition."""
126
+ super (AthenaDatasetDefinition , self ).__init__ (
127
+ catalog = catalog ,
128
+ database = database ,
129
+ query_string = query_string ,
130
+ output_s3_uri = output_s3_uri ,
131
+ work_group = work_group ,
132
+ kms_key_id = kms_key_id ,
133
+ output_format = output_format ,
134
+ output_compression = output_compression ,
135
+ )
136
+
86
137
87
138
class DatasetDefinition (ApiObject ):
88
139
"""DatasetDefinition input.
89
140
90
141
Parameters:
91
- data_distribution_type (str): Whether the generated dataset is FullyReplicated or
92
- ShardedByS3Key (default).
93
- input_mode (str): Whether to use File or Pipe input mode. In File (default) mode, Amazon
142
+ data_distribution_type (str, default="ShardedByS3Key"):
143
+ Whether the generated dataset is FullyReplicated or ShardedByS3Key (default).
144
+ input_mode (str, default="File"):
145
+ Whether to use File or Pipe input mode. In File (default) mode, Amazon
94
146
SageMaker copies the data from the input source onto the local Amazon Elastic Block
95
147
Store (Amazon EBS) volumes before starting your training algorithm. This is the most
96
148
commonly used input mode. In Pipe mode, Amazon SageMaker streams input data from the
97
149
source directly to your algorithm without using the EBS volume.
98
- local_path (str): The local path where you want Amazon SageMaker to download the Dataset
150
+ local_path (str, default=None):
151
+ The local path where you want Amazon SageMaker to download the Dataset
99
152
Definition inputs to run a processing job. LocalPath is an absolute path to the input
100
153
data. This is a required parameter when `AppManaged` is False (default).
101
- redshift_dataset_definition (:class:`~sagemaker.dataset_definition.inputs.RedshiftDatasetDefinition`):
154
+ redshift_dataset_definition
155
+ (:class:`~sagemaker.dataset_definition.inputs.RedshiftDatasetDefinition`,default=None):
102
156
Configuration for Redshift Dataset Definition input.
103
- athena_dataset_definition (:class:`~sagemaker.dataset_definition.inputs.AthenaDatasetDefinition`):
157
+ athena_dataset_definition
158
+ (:class:`~sagemaker.dataset_definition.inputs.AthenaDatasetDefinition`, default=None):
104
159
Configuration for Athena Dataset Definition input.
105
160
"""
106
161
@@ -115,6 +170,23 @@ class DatasetDefinition(ApiObject):
115
170
redshift_dataset_definition = None
116
171
athena_dataset_definition = None
117
172
173
+ def __init__ (
174
+ self ,
175
+ data_distribution_type = "ShardedByS3Key" ,
176
+ input_mode = "File" ,
177
+ local_path = None ,
178
+ redshift_dataset_definition = None ,
179
+ athena_dataset_definition = None ,
180
+ ):
181
+ """Initialize DatasetDefinition."""
182
+ super (DatasetDefinition , self ).__init__ (
183
+ data_distribution_type = data_distribution_type ,
184
+ input_mode = input_mode ,
185
+ local_path = local_path ,
186
+ redshift_dataset_definition = redshift_dataset_definition ,
187
+ athena_dataset_definition = athena_dataset_definition ,
188
+ )
189
+
118
190
119
191
class S3Input (ApiObject ):
120
192
"""Metadata of data objects stored in S3.
@@ -126,13 +198,15 @@ class S3Input(ApiObject):
126
198
Use ManifestFile if strong consistency is required.
127
199
128
200
Parameters:
129
- s3_uri (str): the path to a specific S3 object or a S3 prefix
130
- local_path (str): the path to a local directory. If not provided, skips data download
201
+ s3_uri (str, default=None): the path to a specific S3 object or a S3 prefix
202
+ local_path (str, default=None):
203
+ the path to a local directory. If not provided, skips data download
131
204
by SageMaker platform.
132
- s3_data_type (str): Valid options are "ManifestFile" or "S3Prefix".
133
- s3_input_mode (str): Valid options are "Pipe" or "File".
134
- s3_data_distribution_type (str): Valid options are "FullyReplicated" or "ShardedByS3Key".
135
- s3_compression_type (str): Valid options are "None" or "Gzip".
205
+ s3_data_type (str, default="S3Prefix"): Valid options are "ManifestFile" or "S3Prefix".
206
+ s3_input_mode (str, default="File"): Valid options are "Pipe" or "File".
207
+ s3_data_distribution_type (str, default="FullyReplicated"):
208
+ Valid options are "FullyReplicated" or "ShardedByS3Key".
209
+ s3_compression_type (str, default=None): Valid options are "None" or "Gzip".
136
210
"""
137
211
138
212
s3_uri = None
@@ -141,3 +215,22 @@ class S3Input(ApiObject):
141
215
s3_input_mode = "File"
142
216
s3_data_distribution_type = "FullyReplicated"
143
217
s3_compression_type = None
218
+
219
+ def __init__ (
220
+ self ,
221
+ s3_uri = None ,
222
+ local_path = None ,
223
+ s3_data_type = "S3Prefix" ,
224
+ s3_input_mode = "File" ,
225
+ s3_data_distribution_type = "FullyReplicated" ,
226
+ s3_compression_type = None ,
227
+ ):
228
+ """Initialize S3Input."""
229
+ super (S3Input , self ).__init__ (
230
+ s3_uri = s3_uri ,
231
+ local_path = local_path ,
232
+ s3_data_type = s3_data_type ,
233
+ s3_input_mode = s3_input_mode ,
234
+ s3_data_distribution_type = s3_data_distribution_type ,
235
+ s3_compression_type = s3_compression_type ,
236
+ )
0 commit comments