|
37 | 37 | from sagemaker.workflow.pipeline_experiment_config import PipelineExperimentConfig
|
38 | 38 | from sagemaker.workflow.parallelism_config import ParallelismConfiguration
|
39 | 39 | from sagemaker.workflow.properties import Properties
|
40 |
| -from sagemaker.workflow.steps import Step |
| 40 | +from sagemaker.workflow.steps import Step, StepTypeEnum |
41 | 41 | from sagemaker.workflow.step_collections import StepCollection
|
42 | 42 | from sagemaker.workflow.condition_step import ConditionStep
|
43 | 43 | from sagemaker.workflow.utilities import list_to_request
|
44 | 44 |
|
| 45 | +_DEFAULT_EXPERIMENT_CFG = PipelineExperimentConfig( |
| 46 | + ExecutionVariables.PIPELINE_NAME, ExecutionVariables.PIPELINE_EXECUTION_ID |
| 47 | +) |
| 48 | + |
45 | 49 |
|
46 |
| -@attr.s |
47 | 50 | class Pipeline(Entity):
|
48 |
| - """Pipeline for workflow. |
| 51 | + """Pipeline for workflow.""" |
49 | 52 |
|
50 |
| - Attributes: |
51 |
| - name (str): The name of the pipeline. |
52 |
| - parameters (Sequence[Parameter]): The list of the parameters. |
53 |
| - pipeline_experiment_config (Optional[PipelineExperimentConfig]): If set, |
54 |
| - the workflow will attempt to create an experiment and trial before |
55 |
| - executing the steps. Creation will be skipped if an experiment or a trial with |
56 |
| - the same name already exists. By default, pipeline name is used as |
57 |
| - experiment name and execution id is used as the trial name. |
58 |
| - If set to None, no experiment or trial will be created automatically. |
59 |
| - steps (Sequence[Union[Step, StepCollection]]): The list of the non-conditional steps |
60 |
| - associated with the pipeline. Any steps that are within the |
61 |
| - `if_steps` or `else_steps` of a `ConditionStep` cannot be listed in the steps of a |
62 |
| - pipeline. Of particular note, the workflow service rejects any pipeline definitions that |
63 |
| - specify a step in the list of steps of a pipeline and that step in the `if_steps` or |
64 |
| - `else_steps` of any `ConditionStep`. |
65 |
| - sagemaker_session (sagemaker.session.Session): Session object that manages interactions |
66 |
| - with Amazon SageMaker APIs and any other AWS services needed. If not specified, the |
67 |
| - pipeline creates one using the default AWS configuration chain. |
68 |
| - """ |
| 53 | + def __init__( |
| 54 | + self, |
| 55 | + name: str = "", |
| 56 | + parameters: Optional[Sequence[Parameter]] = None, |
| 57 | + pipeline_experiment_config: Optional[PipelineExperimentConfig] = _DEFAULT_EXPERIMENT_CFG, |
| 58 | + steps: Optional[Sequence[Union[Step, StepCollection]]] = None, |
| 59 | + sagemaker_session: Optional[Session] = None, |
| 60 | + ): |
| 61 | + """Initialize a Pipeline |
69 | 62 |
|
70 |
| - name: str = attr.ib(factory=str) |
71 |
| - parameters: Sequence[Parameter] = attr.ib(factory=list) |
72 |
| - pipeline_experiment_config: Optional[PipelineExperimentConfig] = attr.ib( |
73 |
| - default=PipelineExperimentConfig( |
74 |
| - ExecutionVariables.PIPELINE_NAME, ExecutionVariables.PIPELINE_EXECUTION_ID |
75 |
| - ) |
76 |
| - ) |
77 |
| - steps: Sequence[Union[Step, StepCollection]] = attr.ib(factory=list) |
78 |
| - sagemaker_session: Session = attr.ib(factory=Session) |
| 63 | + Args: |
| 64 | + name (str): The name of the pipeline. |
| 65 | + parameters (Sequence[Parameter]): The list of the parameters. |
| 66 | + pipeline_experiment_config (Optional[PipelineExperimentConfig]): If set, |
| 67 | + the workflow will attempt to create an experiment and trial before |
| 68 | + executing the steps. Creation will be skipped if an experiment or a trial with |
| 69 | + the same name already exists. By default, pipeline name is used as |
| 70 | + experiment name and execution id is used as the trial name. |
| 71 | + If set to None, no experiment or trial will be created automatically. |
| 72 | + steps (Sequence[Union[Step, StepCollection]]): The list of the non-conditional steps |
| 73 | + associated with the pipeline. Any steps that are within the |
| 74 | + `if_steps` or `else_steps` of a `ConditionStep` cannot be listed in the steps of a |
| 75 | + pipeline. Of particular note, the workflow service rejects any pipeline definitions |
| 76 | + that specify a step in the list of steps of a pipeline and that step in the |
| 77 | + `if_steps` or `else_steps` of any `ConditionStep`. |
| 78 | + sagemaker_session (sagemaker.session.Session): Session object that manages interactions |
| 79 | + with Amazon SageMaker APIs and any other AWS services needed. If not specified, the |
| 80 | + pipeline creates one using the default AWS configuration chain. |
| 81 | + """ |
| 82 | + self.name = name |
| 83 | + self.parameters = parameters if parameters else [] |
| 84 | + self.pipeline_experiment_config = pipeline_experiment_config |
| 85 | + self.steps = steps if steps else [] |
| 86 | + self.sagemaker_session = sagemaker_session if sagemaker_session else Session() |
79 | 87 |
|
80 |
| - _version: str = "2020-12-01" |
81 |
| - _metadata: Dict[str, Any] = dict() |
| 88 | + self._version = "2020-12-01" |
| 89 | + self._metadata = dict() |
| 90 | + self._step_map = dict() |
| 91 | + _generate_step_map(self.steps, self._step_map) |
82 | 92 |
|
83 | 93 | def to_request(self) -> RequestType:
|
84 | 94 | """Gets the request structure for workflow service calls."""
|
@@ -193,6 +203,8 @@ def update(
|
193 | 203 | Returns:
|
194 | 204 | A response dict from the service.
|
195 | 205 | """
|
| 206 | + self._step_map = dict() |
| 207 | + _generate_step_map(self.steps, self._step_map) |
196 | 208 | kwargs = self._create_args(role_arn, description, parallelism_config)
|
197 | 209 | return self.sagemaker_session.sagemaker_client.update_pipeline(**kwargs)
|
198 | 210 |
|
@@ -305,23 +317,27 @@ def definition(self) -> str:
|
305 | 317 |
|
306 | 318 | return json.dumps(request_dict)
|
307 | 319 |
|
308 |
| - def _interpolate_step_collection_name_in_depends_on(self, step_requests: dict): |
| 320 | + def _interpolate_step_collection_name_in_depends_on(self, step_requests: list): |
309 | 321 | """Insert step names as per `StepCollection` name in depends_on list
|
310 | 322 |
|
311 | 323 | Args:
|
312 |
| - step_requests (dict): The raw step request dict without any interpolation. |
| 324 | + step_requests (list): The list of raw step request dicts without any interpolation. |
313 | 325 | """
|
314 |
| - step_name_map = {s.name: s for s in self.steps} |
315 | 326 | for step_request in step_requests:
|
316 |
| - if not step_request.get("DependsOn", None): |
317 |
| - continue |
318 | 327 | depends_on = []
|
319 |
| - for depend_step_name in step_request["DependsOn"]: |
320 |
| - if isinstance(step_name_map[depend_step_name], StepCollection): |
321 |
| - depends_on.extend([s.name for s in step_name_map[depend_step_name].steps]) |
| 328 | + for depend_step_name in step_request.get("DependsOn", []): |
| 329 | + if isinstance(self._step_map[depend_step_name], StepCollection): |
| 330 | + depends_on.extend([s.name for s in self._step_map[depend_step_name].steps]) |
322 | 331 | else:
|
323 | 332 | depends_on.append(depend_step_name)
|
324 |
| - step_request["DependsOn"] = depends_on |
| 333 | + if depends_on: |
| 334 | + step_request["DependsOn"] = depends_on |
| 335 | + |
| 336 | + if step_request["Type"] == StepTypeEnum.CONDITION.value: |
| 337 | + sub_step_requests = ( |
| 338 | + step_request["Arguments"]["IfSteps"] + step_request["Arguments"]["ElseSteps"] |
| 339 | + ) |
| 340 | + self._interpolate_step_collection_name_in_depends_on(sub_step_requests) |
325 | 341 |
|
326 | 342 |
|
327 | 343 | def format_start_parameters(parameters: Dict[str, Any]) -> List[Dict[str, Any]]:
|
@@ -448,6 +464,20 @@ def update_args(args: Dict[str, Any], **kwargs):
|
448 | 464 | args.update({key: value})
|
449 | 465 |
|
450 | 466 |
|
| 467 | +def _generate_step_map( |
| 468 | + steps: Sequence[Union[Step, StepCollection]], step_map: dict |
| 469 | +) -> Dict[str, Any]: |
| 470 | + """Helper method to create a mapping from Step/Step Collection name to itself.""" |
| 471 | + for step in steps: |
| 472 | + if step.name in step_map: |
| 473 | + raise ValueError("Pipeline steps cannot have duplicate names.") |
| 474 | + step_map[step.name] = step |
| 475 | + if isinstance(step, ConditionStep): |
| 476 | + _generate_step_map(step.if_steps + step.else_steps, step_map) |
| 477 | + if isinstance(step, StepCollection): |
| 478 | + _generate_step_map(step.steps, step_map) |
| 479 | + |
| 480 | + |
451 | 481 | @attr.s
|
452 | 482 | class _PipelineExecution:
|
453 | 483 | """Internal class for encapsulating pipeline execution instances.
|
@@ -547,22 +577,11 @@ class PipelineGraph:
|
547 | 577 |
|
548 | 578 | def __init__(self, steps: Sequence[Union[Step, StepCollection]]):
|
549 | 579 | self.step_map = {}
|
550 |
| - self._generate_step_map(steps) |
| 580 | + _generate_step_map(steps, self.step_map) |
551 | 581 | self.adjacency_list = self._initialize_adjacency_list()
|
552 | 582 | if self.is_cyclic():
|
553 | 583 | raise ValueError("Cycle detected in pipeline step graph.")
|
554 | 584 |
|
555 |
| - def _generate_step_map(self, steps: Sequence[Union[Step, StepCollection]]): |
556 |
| - """Helper method to create a mapping from Step/Step Collection name to itself.""" |
557 |
| - for step in steps: |
558 |
| - if step.name in self.step_map: |
559 |
| - raise ValueError("Pipeline steps cannot have duplicate names.") |
560 |
| - self.step_map[step.name] = step |
561 |
| - if isinstance(step, ConditionStep): |
562 |
| - self._generate_step_map(step.if_steps + step.else_steps) |
563 |
| - if isinstance(step, StepCollection): |
564 |
| - self._generate_step_map(step.steps) |
565 |
| - |
566 | 585 | @classmethod
|
567 | 586 | def from_pipeline(cls, pipeline: Pipeline):
|
568 | 587 | """Create a PipelineGraph object from the Pipeline object."""
|
|
0 commit comments