aws
diff --git a/‎CHANGELOG.md
Lines changed: 14 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 14 additions & 0 deletions
diff --git a/‎VERSION
Lines changed: 1 addition & 1 deletion b/‎VERSION
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/overview.rst
Lines changed: 2 additions & 2 deletions b/‎doc/overview.rst
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/sagemaker/workflow/conditions.py
Lines changed: 2 additions & 2 deletions b/‎src/sagemaker/workflow/conditions.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/sagemaker/workflow/execution_variables.py
Lines changed: 4 additions & 33 deletions b/‎src/sagemaker/workflow/execution_variables.py
Lines changed: 4 additions & 33 deletions
diff --git a/‎src/sagemaker/workflow/pipeline.py
Lines changed: 23 additions & 9 deletions b/‎src/sagemaker/workflow/pipeline.py
Lines changed: 23 additions & 9 deletions
diff --git a/‎src/sagemaker/workflow/pipeline_experiment_config.py
Lines changed: 76 additions & 0 deletions b/‎src/sagemaker/workflow/pipeline_experiment_config.py
Lines changed: 76 additions & 0 deletions
diff --git a/‎tests/integ/test_workflow.py
Lines changed: 7 additions & 3 deletions b/‎tests/integ/test_workflow.py
Lines changed: 7 additions & 3 deletions
@@ -1,5 +1,19 @@
 # Changelog
 
+## v2.41.0 (2021-05-17)
+
+### Features
+
+ * add pipeline experiment config
+ * add data wrangler processor
+ * support RetryStrategy for training jobs
+
+### Bug Fixes and Other Changes
+
+ * fix repack pipeline step by putting inference.py in "code" sub dir
+ * add data wrangler image uri
+ * fix black-check errors
+
 ## v2.40.0 (2021-05-11)
 
 ### Features
 
@@ -1 +1 @@
-2.40.1.dev0
+2.41.1.dev0
@@ -374,7 +374,7 @@ Here are examples of how to use Amazon FSx for Lustre as input for training:
 
         file_system_input = FileSystemInput(file_system_id='fs-2',
                                             file_system_type='FSxLustre',
-                                            directory_path='/fsx/tensorflow',
+                                            directory_path='/<mount-id>/tensorflow',
                                             file_system_access_mode='ro')
 
         # Start an Amazon SageMaker training job with FSx using the FileSystemInput class
@@ -394,7 +394,7 @@ Here are examples of how to use Amazon FSx for Lustre as input for training:
 
         records = FileSystemRecordSet(file_system_id='fs-=2,
                                       file_system_type='FSxLustre',
-                                      directory_path='/fsx/kmeans',
+                                      directory_path='/<mount-id>/kmeans',
                                       num_records=784,
                                       feature_dim=784)
 
 
@@ -186,8 +186,8 @@ def to_request(self) -> RequestType:
         """Get the request structure for workflow service calls."""
         return {
             "Type": self.condition_type.value,
-            "Value": self.value.expr,
-            "In": [primitive_or_expr(in_value) for in_value in self.in_values],
+            "QueryValue": self.value.expr,
+            "Values": [primitive_or_expr(in_value) for in_value in self.in_values],
         }
 
 
 
@@ -13,56 +13,27 @@
 """Pipeline parameters and conditions for workflow."""
 from __future__ import absolute_import
 
-from typing import Dict
-
 from sagemaker.workflow.entities import (
-    Entity,
+    Expression,
     RequestType,
 )
 
 
-class ExecutionVariable(Entity, str):
+class ExecutionVariable(Expression):
     """Pipeline execution variables for workflow."""
 
-    def __new__(cls, *args, **kwargs):  # pylint: disable=unused-argument
-        """Subclass str"""
-        value = ""
-        if len(args) == 1:
-            value = args[0] or value
-        elif kwargs:
-            value = kwargs.get("name", value)
-        return str.__new__(cls, ExecutionVariable._expr(value))
-
     def __init__(self, name: str):
         """Create a pipeline execution variable.
 
         Args:
             name (str): The name of the execution variable.
         """
-        super(ExecutionVariable, self).__init__()
         self.name = name
 
-    def __hash__(self):
-        """Hash function for execution variable types"""
-        return hash(tuple(self.to_request()))
-
-    def to_request(self) -> RequestType:
-        """Get the request structure for workflow service calls."""
-        return self.expr
-
     @property
-    def expr(self) -> Dict[str, str]:
+    def expr(self) -> RequestType:
         """The 'Get' expression dict for an `ExecutionVariable`."""
-        return ExecutionVariable._expr(self.name)
-
-    @classmethod
-    def _expr(cls, name):
-        """An internal classmethod for the 'Get' expression dict for an `ExecutionVariable`.
-
-        Args:
-            name (str): The name of the execution variable.
-        """
-        return {"Get": f"Execution.{name}"}
+        return {"Get": f"Execution.{self.name}"}
 
 
 class ExecutionVariables:
 
@@ -16,7 +16,7 @@
 import json
 
 from copy import deepcopy
-from typing import Any, Dict, List, Sequence, Union
+from typing import Any, Dict, List, Sequence, Union, Optional
 
 import attr
 import botocore
@@ -30,7 +30,9 @@
     Expression,
     RequestType,
 )
+from sagemaker.workflow.execution_variables import ExecutionVariables
 from sagemaker.workflow.parameters import Parameter
+from sagemaker.workflow.pipeline_experiment_config import PipelineExperimentConfig
 from sagemaker.workflow.properties import Properties
 from sagemaker.workflow.steps import Step
 from sagemaker.workflow.step_collections import StepCollection
@@ -44,6 +46,12 @@ class Pipeline(Entity):
     Attributes:
         name (str): The name of the pipeline.
         parameters (Sequence[Parameters]): The list of the parameters.
+        pipeline_experiment_config (Optional[PipelineExperimentConfig]): If set,
+            the workflow will attempt to create an experiment and trial before
+            executing the steps. Creation will be skipped if an experiment or a trial with
+            the same name already exists. By default, pipeline name is used as
+            experiment name and execution id is used as the trial name.
+            If set to None, no experiment or trial will be created automatically.
         steps (Sequence[Steps]): The list of the non-conditional steps associated with the pipeline.
             Any steps that are within the
             `if_steps` or `else_steps` of a `ConditionStep` cannot be listed in the steps of a
@@ -57,6 +65,11 @@ class Pipeline(Entity):
 
     name: str = attr.ib(factory=str)
     parameters: Sequence[Parameter] = attr.ib(factory=list)
+    pipeline_experiment_config: Optional[PipelineExperimentConfig] = attr.ib(
+        default=PipelineExperimentConfig(
+            ExecutionVariables.PIPELINE_NAME, ExecutionVariables.PIPELINE_EXECUTION_ID
+        )
+    )
     steps: Sequence[Union[Step, StepCollection]] = attr.ib(factory=list)
     sagemaker_session: Session = attr.ib(factory=Session)
 
@@ -69,22 +82,23 @@ def to_request(self) -> RequestType:
             "Version": self._version,
             "Metadata": self._metadata,
             "Parameters": list_to_request(self.parameters),
+            "PipelineExperimentConfig": self.pipeline_experiment_config.to_request()
+            if self.pipeline_experiment_config is not None
+            else None,
             "Steps": list_to_request(self.steps),
         }
 
     def create(
         self,
         role_arn: str,
         description: str = None,
-        experiment_name: str = None,
         tags: List[Dict[str, str]] = None,
     ) -> Dict[str, Any]:
         """Creates a Pipeline in the Pipelines service.
 
         Args:
             role_arn (str): The role arn that is assumed by the pipeline to create step artifacts.
             description (str): A description of the pipeline.
-            experiment_name (str): The name of the experiment.
             tags (List[Dict[str, str]]): A list of {"Key": "string", "Value": "string"} dicts as
                 tags.
 
@@ -96,7 +110,6 @@ def create(
         kwargs = self._create_args(role_arn, description)
         update_args(
             kwargs,
-            ExperimentName=experiment_name,
             Tags=tags,
         )
         return self.sagemaker_session.sagemaker_client.create_pipeline(**kwargs)
@@ -106,7 +119,7 @@ def _create_args(self, role_arn: str, description: str):
 
         Args:
             role_arn (str): The role arn that is assumed by pipelines to create step artifacts.
-            pipeline_description (str): A description of the pipeline.
+            description (str): A description of the pipeline.
 
         Returns:
             A keyword argument dict for calling create_pipeline.
@@ -147,23 +160,21 @@ def upsert(
         self,
         role_arn: str,
         description: str = None,
-        experiment_name: str = None,
         tags: List[Dict[str, str]] = None,
     ) -> Dict[str, Any]:
         """Creates a pipeline or updates it, if it already exists.
 
         Args:
             role_arn (str): The role arn that is assumed by workflow to create step artifacts.
-            pipeline_description (str): A description of the pipeline.
-            experiment_name (str): The name of the experiment.
+            description (str): A description of the pipeline.
             tags (List[Dict[str, str]]): A list of {"Key": "string", "Value": "string"} dicts as
                 tags.
 
         Returns:
             response dict from service
         """
         try:
-            response = self.create(role_arn, description, experiment_name, tags)
+            response = self.create(role_arn, description, tags)
         except ClientError as e:
             error = e.response["Error"]
             if (
@@ -224,6 +235,9 @@ def start(
     def definition(self) -> str:
         """Converts a request structure to string representation for workflow service calls."""
         request_dict = self.to_request()
+        request_dict["PipelineExperimentConfig"] = interpolate(
+            request_dict["PipelineExperimentConfig"]
+        )
         request_dict["Steps"] = interpolate(request_dict["Steps"])
 
         return json.dumps(request_dict)
 
@@ -0,0 +1,76 @@
+# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+"""Pipeline experiment config for SageMaker pipeline."""
+from __future__ import absolute_import
+
+from typing import Union
+
+from sagemaker.workflow.parameters import Parameter
+from sagemaker.workflow.execution_variables import ExecutionVariable
+from sagemaker.workflow.entities import (
+    Entity,
+    Expression,
+    RequestType,
+)
+
+
+class PipelineExperimentConfig(Entity):
+    """Experiment config for SageMaker pipeline."""
+
+    def __init__(
+        self,
+        experiment_name: Union[str, Parameter, ExecutionVariable, Expression],
+        trial_name: Union[str, Parameter, ExecutionVariable, Expression],
+    ):
+        """Create a PipelineExperimentConfig
+
+        Args:
+            experiment_name: the name of the experiment that will be created
+            trial_name: the name of the trial that will be created
+        """
+        self.experiment_name = experiment_name
+        self.trial_name = trial_name
+
+    def to_request(self) -> RequestType:
+        """Returns: the request structure."""
+
+        return {
+            "ExperimentName": self.experiment_name,
+            "TrialName": self.trial_name,
+        }
+
+
+class PipelineExperimentConfigProperty(Expression):
+    """Reference to pipeline experiment config property."""
+
+    def __init__(self, name: str):
+        """Create a reference to pipeline experiment property.
+
+        Args:
+            name (str): The name of the pipeline experiment config property.
+        """
+        super(PipelineExperimentConfigProperty, self).__init__()
+        self.name = name
+
+    @property
+    def expr(self) -> RequestType:
+        """The 'Get' expression dict for a pipeline experiment config property."""
+
+        return {"Get": f"PipelineExperimentConfig.{self.name}"}
+
+
+class PipelineExperimentConfigProperties:
+    """Enum-like class for all pipeline experiment config property references."""
+
+    EXPERIMENT_NAME = PipelineExperimentConfigProperty("ExperimentName")
+    TRIAL_NAME = PipelineExperimentConfigProperty("TrialName")
@@ -38,7 +38,7 @@
 from sagemaker.sklearn.estimator import SKLearn
 from sagemaker.sklearn.processing import SKLearnProcessor
 from sagemaker.spark.processing import PySparkProcessor, SparkJarProcessor
-from sagemaker.workflow.conditions import ConditionGreaterThanOrEqualTo
+from sagemaker.workflow.conditions import ConditionGreaterThanOrEqualTo, ConditionIn
 from sagemaker.workflow.condition_step import ConditionStep
 from sagemaker.wrangler.processing import DataWranglerProcessor
 from sagemaker.dataset_definition.inputs import DatasetDefinition, AthenaDatasetDefinition
@@ -696,6 +696,7 @@ def test_conditional_pytorch_training_model_registration(
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
     instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
     good_enough_input = ParameterInteger(name="GoodEnoughInput", default_value=1)
+    in_condition_input = ParameterString(name="Foo", default_value="Foo")
 
     pytorch_estimator = PyTorch(
         entry_point=entry_point,
@@ -741,14 +742,17 @@ def test_conditional_pytorch_training_model_registration(
 
     step_cond = ConditionStep(
         name="cond-good-enough",
-        conditions=[ConditionGreaterThanOrEqualTo(left=good_enough_input, right=1)],
+        conditions=[
+            ConditionGreaterThanOrEqualTo(left=good_enough_input, right=1),
+            ConditionIn(value=in_condition_input, in_values=["foo", "bar"]),
+        ],
         if_steps=[step_train, step_register],
         else_steps=[step_model],
     )
 
     pipeline = Pipeline(
         name=pipeline_name,
-        parameters=[good_enough_input, instance_count, instance_type],
+        parameters=[in_condition_input, good_enough_input, instance_count, instance_type],
         steps=[step_cond],
         sagemaker_session=sagemaker_session,
     )
Original file line number	Diff line number	Diff line change
`@@ -186,8 +186,8 @@ def to_request(self) -> RequestType:`
`186`	`186`	`"""Get the request structure for workflow service calls."""`
`187`	`187`	`return {`
`188`	`188`	`"Type": self.condition_type.value,`
`189`		`- "Value": self.value.expr,`
`190`		`- "In": [primitive_or_expr(in_value) for in_value in self.in_values],`
	`189`	`+ "QueryValue": self.value.expr,`
	`190`	`+ "Values": [primitive_or_expr(in_value) for in_value in self.in_values],`
`191`	`191`	`}`
`192`	`192`
`193`	`193`