Allow users to customize trial component display names for pipeline launched jobs

Zhankuil · Zhankuil · commit eaf35a774bcd · 2022-07-12T11:08:23.000-07:00
diff --git a/doc/amazon_sagemaker_model_building_pipeline.rst b/doc/amazon_sagemaker_model_building_pipeline.rst
@@ -741,6 +741,8 @@ There are a number of properties for a pipeline execution that can only be resol
 - :class:`sagemaker.workflow.execution_variables.ExecutionVariables.PIPELINE_EXECUTION_ARN`: The execution ARN for an execution.
 - :class:`sagemaker.workflow.execution_variables.ExecutionVariables.PIPELINE_NAME`: The name of the pipeline.
 - :class:`sagemaker.workflow.execution_variables.ExecutionVariables.PIPELINE_ARN`: The ARN of the pipeline.
+- :class:`sagemaker.workflow.execution_variables.ExecutionVariables.TRAINING_JOB_NAME`: The name of the training job launched by the training step.
+- :class:`sagemaker.workflow.execution_variables.ExecutionVariables.PROCESSING_JOB_NAME`: The name of the processing job launched by the processing step.
 
 You can use these execution variables as you see fit. The following example uses the :code:`START_DATETIME` execution variable to construct a processing output path:
 
diff --git a/doc/workflows/pipelines/sagemaker.workflow.pipelines.rst b/doc/workflows/pipelines/sagemaker.workflow.pipelines.rst
@@ -52,7 +52,7 @@ Execution Variables
 .. autoclass:: sagemaker.workflow.execution_variables.ExecutionVariable
 
 .. autoclass:: sagemaker.workflow.execution_variables.ExecutionVariables
-    :members: START_DATETIME, CURRENT_DATETIME, PIPELINE_EXECUTION_ID, PIPELINE_EXECUTION_ARN, PIPELINE_NAME, PIPELINE_ARN
+    :members: START_DATETIME, CURRENT_DATETIME, PIPELINE_EXECUTION_ID, PIPELINE_EXECUTION_ARN, PIPELINE_NAME, PIPELINE_ARN, TRAINING_JOB_NAME, PROCESSING_JOB_NAME
 
 Functions
 ---------
diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
@@ -1000,6 +1000,12 @@ def fit(
                 * If both `ExperimentName` and `TrialName` are not supplied the trial component
                 will be unassociated.
                 * `TrialComponentDisplayName` is used for display in Studio.
+                * Both `ExperimentName` and `TrialName` will be ignored if the Estimator instance
+                is built with :class:`~sagemaker.workflow.pipeline_context.PipelineSession`.
+                However, the value of `TrialComponentDisplayName` is honored for display in Studio.
+        Returns:
+            None or pipeline step arguments in case the Estimator instance is built with
+            :class:`~sagemaker.workflow.pipeline_context.PipelineSession`
         """
         self._prepare_for_training(job_name=job_name)
 
diff --git a/src/sagemaker/processing.py b/src/sagemaker/processing.py
@@ -173,9 +173,14 @@ def run(
                 * If both `ExperimentName` and `TrialName` are not supplied the trial component
                 will be unassociated.
                 * `TrialComponentDisplayName` is used for display in Studio.
+                * Both `ExperimentName` and `TrialName` will be ignored if the Processor instance
+                is built with :class:`~sagemaker.workflow.pipeline_context.PipelineSession`.
+                However, the value of `TrialComponentDisplayName` is honored for display in Studio.
             kms_key (str): The ARN of the KMS key that is used to encrypt the
                 user code file (default: None).
-
+        Returns:
+            None or pipeline step arguments in case the Processor instance is built with
+            :class:`~sagemaker.workflow.pipeline_context.PipelineSession`
         Raises:
             ValueError: if ``logs`` is True but ``wait`` is False.
         """
@@ -543,8 +548,14 @@ def run(
                 * If both `ExperimentName` and `TrialName` are not supplied the trial component
                 will be unassociated.
                 * `TrialComponentDisplayName` is used for display in Studio.
+                * Both `ExperimentName` and `TrialName` will be ignored if the Processor instance
+                is built with :class:`~sagemaker.workflow.pipeline_context.PipelineSession`.
+                However, the value of `TrialComponentDisplayName` is honored for display in Studio.
             kms_key (str): The ARN of the KMS key that is used to encrypt the
                 user code file (default: None).
+        Returns:
+            None or pipeline step arguments in case the Processor instance is built with
+            :class:`~sagemaker.workflow.pipeline_context.PipelineSession`
         """
         normalized_inputs, normalized_outputs = self._normalize_args(
             job_name=job_name,
@@ -1601,8 +1612,14 @@ def run(  # type: ignore[override]
                 * If both `ExperimentName` and `TrialName` are not supplied the trial component
                 will be unassociated.
                 * `TrialComponentDisplayName` is used for display in Studio.
+                * Both `ExperimentName` and `TrialName` will be ignored if the Processor instance
+                is built with :class:`~sagemaker.workflow.pipeline_context.PipelineSession`.
+                However, the value of `TrialComponentDisplayName` is honored for display in Studio.
             kms_key (str): The ARN of the KMS key that is used to encrypt the
                 user code file (default: None).
+        Returns:
+            None or pipeline step arguments in case the Processor instance is built with
+            :class:`~sagemaker.workflow.pipeline_context.PipelineSession`
         """
         s3_runproc_sh, inputs, job_name = self._pack_and_upload_code(
             code, source_dir, dependencies, git_config, job_name, inputs
diff --git a/src/sagemaker/transformer.py b/src/sagemaker/transformer.py
@@ -186,6 +186,9 @@ def transform(
                 * If both `ExperimentName` and `TrialName` are not supplied the trial component
                 will be unassociated.
                 * `TrialComponentDisplayName` is used for display in Studio.
+                * Both `ExperimentName` and `TrialName` will be ignored if the Transformer instance
+                is built with :class:`~sagemaker.workflow.pipeline_context.PipelineSession`.
+                However, the value of `TrialComponentDisplayName` is honored for display in Studio.
             model_client_config (dict[str, str]): Model configuration.
                 Dictionary contains two optional keys,
                 'InvocationsTimeoutInSeconds', and 'InvocationsMaxRetries'.
@@ -194,6 +197,11 @@ def transform(
                 (default: ``True``).
             logs (bool): Whether to show the logs produced by the job.
                 Only meaningful when wait is ``True`` (default: ``True``).
+            kms_key (str): The ARN of the KMS key that is used to encrypt the
+                user code file (default: None).
+        Returns:
+            None or pipeline step arguments in case the Transformer instance is built with
+            :class:`~sagemaker.workflow.pipeline_context.PipelineSession`
         """
         local_mode = self.sagemaker_session.local_mode
         if not local_mode and not is_pipeline_variable(data) and not data.startswith("s3://"):
diff --git a/src/sagemaker/workflow/execution_variables.py b/src/sagemaker/workflow/execution_variables.py
@@ -58,6 +58,8 @@ class ExecutionVariables:
     - ExecutionVariables.PIPELINE_ARN
     - ExecutionVariables.PIPELINE_EXECUTION_ID
     - ExecutionVariables.PIPELINE_EXECUTION_ARN
+    - ExecutionVariables.TRAINING_JOB_NAME
+    - ExecutionVariables.PROCESSING_JOB_NAME
     """
 
     START_DATETIME = ExecutionVariable("StartDateTime")
@@ -66,3 +68,5 @@ class ExecutionVariables:
     PIPELINE_ARN = ExecutionVariable("PipelineArn")
     PIPELINE_EXECUTION_ID = ExecutionVariable("PipelineExecutionId")
     PIPELINE_EXECUTION_ARN = ExecutionVariable("PipelineExecutionArn")
+    TRAINING_JOB_NAME = ExecutionVariable("TrainingJobName")
+    PROCESSING_JOB_NAME = ExecutionVariable("ProcessingJobName")
diff --git a/src/sagemaker/workflow/steps.py b/src/sagemaker/workflow/steps.py
@@ -223,6 +223,18 @@ def _get_step_name_from_str(
             return step_map[str_input].steps[-1].name
         return str_input
 
+    @staticmethod
+    def _trim_experiment_config(request_dict: Dict):
+        """For job steps, trim the experiment config to keep the trial component display name."""
+        if request_dict.get("ExperimentConfig", {}).get("TrialComponentDisplayName"):
+            request_dict["ExperimentConfig"] = {
+                "TrialComponentDisplayName": request_dict["ExperimentConfig"][
+                    "TrialComponentDisplayName"
+                ]
+            }
+        else:
+            request_dict.pop("ExperimentConfig", None)
+
 
 @attr.s
 class CacheConfig:
@@ -429,7 +441,7 @@ def arguments(self) -> RequestType:
             request_dict["HyperParameters"].pop("sagemaker_job_name", None)
 
         request_dict.pop("TrainingJobName", None)
-        request_dict.pop("ExperimentConfig", None)
+        Step._trim_experiment_config(request_dict)
 
         return request_dict
 
@@ -660,7 +672,8 @@ def arguments(self) -> RequestType:
             )
 
         request_dict.pop("TransformJobName", None)
-        request_dict.pop("ExperimentConfig", None)
+        Step._trim_experiment_config(request_dict)
+
         return request_dict
 
     @property
@@ -808,7 +821,8 @@ def arguments(self) -> RequestType:
             request_dict = self.processor.sagemaker_session._get_process_request(**process_args)
 
         request_dict.pop("ProcessingJobName", None)
-        request_dict.pop("ExperimentConfig", None)
+        Step._trim_experiment_config(request_dict)
+
         return request_dict
 
     @property
diff --git a/tests/unit/sagemaker/workflow/test_processing_step.py b/tests/unit/sagemaker/workflow/test_processing_step.py
@@ -18,6 +18,8 @@
 import pytest
 import warnings
 
+from copy import deepcopy
+
 from sagemaker.estimator import Estimator
 from sagemaker.parameter import IntegerParameter
 from sagemaker.transformer import Transformer
@@ -268,7 +270,34 @@ def network_config():
     )
 
 
-def test_processing_step_with_processor(pipeline_session, processing_input):
+@pytest.mark.parametrize(
+    "experiment_config, expected_experiment_config",
+    [
+        (
+            {
+                "ExperimentName": "experiment-name",
+                "TrialName": "trial-name",
+                "TrialComponentDisplayName": "display-name",
+            },
+            {"TrialComponentDisplayName": "display-name"},
+        ),
+        (
+            {"TrialComponentDisplayName": "display-name"},
+            {"TrialComponentDisplayName": "display-name"},
+        ),
+        (
+            {
+                "ExperimentName": "experiment-name",
+                "TrialName": "trial-name",
+            },
+            None,
+        ),
+        (None, None),
+    ],
+)
+def test_processing_step_with_processor(
+    pipeline_session, processing_input, experiment_config, expected_experiment_config
+):
     custom_step1 = CustomStep("TestStep")
     custom_step2 = CustomStep("SecondTestStep")
     processor = Processor(
@@ -280,7 +309,7 @@ def test_processing_step_with_processor(pipeline_session, processing_input):
     )
 
     with warnings.catch_warnings(record=True) as w:
-        step_args = processor.run(inputs=processing_input)
+        step_args = processor.run(inputs=processing_input, experiment_config=experiment_config)
         assert len(w) == 1
         assert issubclass(w[-1].category, UserWarning)
         assert "Running within a PipelineSession" in str(w[-1].message)
@@ -307,13 +336,21 @@ def test_processing_step_with_processor(pipeline_session, processing_input):
         steps=[step, custom_step1, custom_step2],
         sagemaker_session=pipeline_session,
     )
+
+    expected_step_arguments = deepcopy(step_args.args)
+    if expected_experiment_config is None:
+        expected_step_arguments.pop("ExperimentConfig", None)
+    else:
+        expected_step_arguments["ExperimentConfig"] = expected_experiment_config
+    del expected_step_arguments["ProcessingJobName"]
+
     assert json.loads(pipeline.definition())["Steps"][0] == {
         "Name": "MyProcessingStep",
         "Description": "ProcessingStep description",
         "DisplayName": "MyProcessingStep",
         "Type": "Processing",
         "DependsOn": ["TestStep", "SecondTestStep"],
-        "Arguments": step_args.args,
+        "Arguments": expected_step_arguments,
         "CacheConfig": {"Enabled": True, "ExpireAfter": "PT1H"},
         "PropertyFiles": [
             {
diff --git a/tests/unit/sagemaker/workflow/test_training_step.py b/tests/unit/sagemaker/workflow/test_training_step.py
@@ -19,6 +19,8 @@
 import pytest
 import warnings
 
+from copy import deepcopy
+
 from sagemaker import Processor, Model
 from sagemaker.parameter import IntegerParameter
 from sagemaker.transformer import Transformer
@@ -200,7 +202,34 @@ def hyperparameters():
     return {"test-key": "test-val"}
 
 
-def test_training_step_with_estimator(pipeline_session, training_input, hyperparameters):
+@pytest.mark.parametrize(
+    "experiment_config, expected_experiment_config",
+    [
+        (
+            {
+                "ExperimentName": "experiment-name",
+                "TrialName": "trial-name",
+                "TrialComponentDisplayName": "display-name",
+            },
+            {"TrialComponentDisplayName": "display-name"},
+        ),
+        (
+            {"TrialComponentDisplayName": "display-name"},
+            {"TrialComponentDisplayName": "display-name"},
+        ),
+        (
+            {
+                "ExperimentName": "experiment-name",
+                "TrialName": "trial-name",
+            },
+            None,
+        ),
+        (None, None),
+    ],
+)
+def test_training_step_with_estimator(
+    pipeline_session, training_input, hyperparameters, experiment_config, expected_experiment_config
+):
     custom_step1 = CustomStep("TestStep")
     custom_step2 = CustomStep("SecondTestStep")
     enable_network_isolation = ParameterBoolean(name="enable_network_isolation")
@@ -217,7 +246,7 @@ def test_training_step_with_estimator(pipeline_session, training_input, hyperpar
     )
 
     with warnings.catch_warnings(record=True) as w:
-        step_args = estimator.fit(inputs=training_input)
+        step_args = estimator.fit(inputs=training_input, experiment_config=experiment_config)
         assert len(w) == 1
         assert issubclass(w[-1].category, UserWarning)
         assert "Running within a PipelineSession" in str(w[-1].message)
@@ -238,17 +267,28 @@ def test_training_step_with_estimator(pipeline_session, training_input, hyperpar
         parameters=[enable_network_isolation, encrypt_container_traffic],
         sagemaker_session=pipeline_session,
     )
-    step_args.args["EnableInterContainerTrafficEncryption"] = {
+
+    expected_step_arguments = deepcopy(step_args.args)
+
+    expected_step_arguments["EnableInterContainerTrafficEncryption"] = {
         "Get": "Parameters.encrypt_container_traffic"
     }
-    step_args.args["EnableNetworkIsolation"] = {"Get": "Parameters.encrypt_container_traffic"}
+    expected_step_arguments["EnableNetworkIsolation"] = {
+        "Get": "Parameters.enable_network_isolation"
+    }
+    if expected_experiment_config is None:
+        expected_step_arguments.pop("ExperimentConfig", None)
+    else:
+        expected_step_arguments["ExperimentConfig"] = expected_experiment_config
+    del expected_step_arguments["TrainingJobName"]
+
     assert json.loads(pipeline.definition())["Steps"][0] == {
         "Name": "MyTrainingStep",
         "Description": "TrainingStep description",
         "DisplayName": "MyTrainingStep",
         "Type": "Training",
         "DependsOn": ["TestStep", "SecondTestStep"],
-        "Arguments": step_args.args,
+        "Arguments": expected_step_arguments,
     }
     assert step.properties.TrainingJobName.expr == {"Get": "Steps.MyTrainingStep.TrainingJobName"}
     adjacency_list = PipelineGraph.from_pipeline(pipeline).adjacency_list
diff --git a/tests/unit/sagemaker/workflow/test_transform_step.py b/tests/unit/sagemaker/workflow/test_transform_step.py