22
22
import botocore
23
23
from botocore .exceptions import ClientError
24
24
25
+ from sagemaker import s3
25
26
from sagemaker ._studio import _append_project_tags
26
27
from sagemaker .session import Session
27
28
from sagemaker .workflow .callback_step import CallbackOutput , CallbackStep
34
35
from sagemaker .workflow .execution_variables import ExecutionVariables
35
36
from sagemaker .workflow .parameters import Parameter
36
37
from sagemaker .workflow .pipeline_experiment_config import PipelineExperimentConfig
38
+ from sagemaker .workflow .parallelism_config import ParallelismConfiguration
37
39
from sagemaker .workflow .properties import Properties
38
40
from sagemaker .workflow .steps import Step
39
41
from sagemaker .workflow .step_collections import StepCollection
@@ -94,6 +96,7 @@ def create(
94
96
role_arn : str ,
95
97
description : str = None ,
96
98
tags : List [Dict [str , str ]] = None ,
99
+ parallelism_config : ParallelismConfiguration = None ,
97
100
) -> Dict [str , Any ]:
98
101
"""Creates a Pipeline in the Pipelines service.
99
102
@@ -102,37 +105,62 @@ def create(
102
105
description (str): A description of the pipeline.
103
106
tags (List[Dict[str, str]]): A list of {"Key": "string", "Value": "string"} dicts as
104
107
tags.
108
+ parallelism_config (Optional[ParallelismConfiguration]): Parallelism configuration
109
+ that is applied to each of the executions of the pipeline. It takes precedence
110
+ over the parallelism configuration of the parent pipeline.
105
111
106
112
Returns:
107
113
A response dict from the service.
108
114
"""
109
115
tags = _append_project_tags (tags )
110
-
111
- kwargs = self ._create_args (role_arn , description )
116
+ kwargs = self ._create_args (role_arn , description , parallelism_config )
112
117
update_args (
113
118
kwargs ,
114
119
Tags = tags ,
115
120
)
116
121
return self .sagemaker_session .sagemaker_client .create_pipeline (** kwargs )
117
122
118
- def _create_args (self , role_arn : str , description : str ):
123
+ def _create_args (
124
+ self , role_arn : str , description : str , parallelism_config : ParallelismConfiguration
125
+ ):
119
126
"""Constructs the keyword argument dict for a create_pipeline call.
120
127
121
128
Args:
122
129
role_arn (str): The role arn that is assumed by pipelines to create step artifacts.
123
130
description (str): A description of the pipeline.
131
+ parallelism_config (Optional[ParallelismConfiguration]): Parallelism configuration
132
+ that is applied to each of the executions of the pipeline. It takes precedence
133
+ over the parallelism configuration of the parent pipeline.
124
134
125
135
Returns:
126
136
A keyword argument dict for calling create_pipeline.
127
137
"""
138
+ pipeline_definition = self .definition ()
128
139
kwargs = dict (
129
140
PipelineName = self .name ,
130
- PipelineDefinition = self .definition (),
131
141
RoleArn = role_arn ,
132
142
)
143
+
144
+ # If pipeline definition is large, upload to S3 bucket and
145
+ # provide PipelineDefinitionS3Location to request instead.
146
+ if len (pipeline_definition .encode ("utf-8" )) < 1024 * 100 :
147
+ kwargs ["PipelineDefinition" ] = pipeline_definition
148
+ else :
149
+ desired_s3_uri = s3 .s3_path_join (
150
+ "s3://" , self .sagemaker_session .default_bucket (), self .name
151
+ )
152
+ s3 .S3Uploader .upload_string_as_file_body (
153
+ body = pipeline_definition ,
154
+ desired_s3_uri = desired_s3_uri ,
155
+ sagemaker_session = self .sagemaker_session ,
156
+ )
157
+ kwargs ["PipelineDefinitionS3Location" ] = {
158
+ "Bucket" : self .sagemaker_session .default_bucket (),
159
+ "ObjectKey" : self .name ,
160
+ }
161
+
133
162
update_args (
134
- kwargs ,
135
- PipelineDescription = description ,
163
+ kwargs , PipelineDescription = description , ParallelismConfiguration = parallelism_config
136
164
)
137
165
return kwargs
138
166
@@ -146,24 +174,33 @@ def describe(self) -> Dict[str, Any]:
146
174
"""
147
175
return self .sagemaker_session .sagemaker_client .describe_pipeline (PipelineName = self .name )
148
176
149
- def update (self , role_arn : str , description : str = None ) -> Dict [str , Any ]:
177
+ def update (
178
+ self ,
179
+ role_arn : str ,
180
+ description : str = None ,
181
+ parallelism_config : ParallelismConfiguration = None ,
182
+ ) -> Dict [str , Any ]:
150
183
"""Updates a Pipeline in the Workflow service.
151
184
152
185
Args:
153
186
role_arn (str): The role arn that is assumed by pipelines to create step artifacts.
154
187
description (str): A description of the pipeline.
188
+ parallelism_config (Optional[ParallelismConfiguration]): Parallelism configuration
189
+ that is applied to each of the executions of the pipeline. It takes precedence
190
+ over the parallelism configuration of the parent pipeline.
155
191
156
192
Returns:
157
193
A response dict from the service.
158
194
"""
159
- kwargs = self ._create_args (role_arn , description )
195
+ kwargs = self ._create_args (role_arn , description , parallelism_config )
160
196
return self .sagemaker_session .sagemaker_client .update_pipeline (** kwargs )
161
197
162
198
def upsert (
163
199
self ,
164
200
role_arn : str ,
165
201
description : str = None ,
166
202
tags : List [Dict [str , str ]] = None ,
203
+ parallelism_config : ParallelismConfiguration = None ,
167
204
) -> Dict [str , Any ]:
168
205
"""Creates a pipeline or updates it, if it already exists.
169
206
@@ -172,12 +209,14 @@ def upsert(
172
209
description (str): A description of the pipeline.
173
210
tags (List[Dict[str, str]]): A list of {"Key": "string", "Value": "string"} dicts as
174
211
tags.
212
+ parallelism_config (Optional[Config for parallel steps, Parallelism configuration that
213
+ is applied to each of. the executions
175
214
176
215
Returns:
177
216
response dict from service
178
217
"""
179
218
try :
180
- response = self .create (role_arn , description , tags )
219
+ response = self .create (role_arn , description , tags , parallelism_config )
181
220
except ClientError as e :
182
221
error = e .response ["Error" ]
183
222
if (
@@ -215,6 +254,7 @@ def start(
215
254
parameters : Dict [str , Union [str , bool , int , float ]] = None ,
216
255
execution_display_name : str = None ,
217
256
execution_description : str = None ,
257
+ parallelism_config : ParallelismConfiguration = None ,
218
258
):
219
259
"""Starts a Pipeline execution in the Workflow service.
220
260
@@ -223,6 +263,9 @@ def start(
223
263
pipeline parameters.
224
264
execution_display_name (str): The display name of the pipeline execution.
225
265
execution_description (str): A description of the execution.
266
+ parallelism_config (Optional[ParallelismConfiguration]): Parallelism configuration
267
+ that is applied to each of the executions of the pipeline. It takes precedence
268
+ over the parallelism configuration of the parent pipeline.
226
269
227
270
Returns:
228
271
A `_PipelineExecution` instance, if successful.
@@ -245,6 +288,7 @@ def start(
245
288
PipelineParameters = format_start_parameters (parameters ),
246
289
PipelineExecutionDescription = execution_description ,
247
290
PipelineExecutionDisplayName = execution_display_name ,
291
+ ParallelismConfiguration = parallelism_config ,
248
292
)
249
293
response = self .sagemaker_session .sagemaker_client .start_pipeline_execution (** kwargs )
250
294
return _PipelineExecution (
0 commit comments