feature: add RunName to expeirment_config (aws#696)

yzhu0 · qidewenwhen · commit d52d42a384f3 · 2022-12-13T19:39:12.000-08:00
diff --git a/src/sagemaker/amazon/amazon_estimator.py b/src/sagemaker/amazon/amazon_estimator.py
@@ -242,8 +242,8 @@ def fit(
                 generates a default job name, based on the training image name
                 and current timestamp.
             experiment_config (dict[str, str]): Experiment management configuration.
-                Optionally, the dict can contain three keys:
-                'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
+                Optionally, the dict can contain four keys:
+                'ExperimentName', 'TrialName', 'TrialComponentDisplayName' and 'RunName'.
                 The behavior of setting these keys is as follows:
                 * If `ExperimentName` is supplied but `TrialName` is not a Trial will be
                 automatically created and the job's Trial Component associated with the Trial.
diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py
@@ -1103,8 +1103,8 @@ def fit(
             job_name (str): Training job name. If not specified, the estimator generates
                 a default job name based on the training image name and current timestamp.
             experiment_config (dict[str, str]): Experiment management configuration.
-                Optionally, the dict can contain three keys:
-                'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
+                Optionally, the dict can contain four keys:
+                'ExperimentName', 'TrialName', 'TrialComponentDisplayName' and 'RunName'..
                 The behavior of setting these keys is as follows:
                 * If `ExperimentName` is supplied but `TrialName` is not a Trial will be
                 automatically created and the job's Trial Component associated with the Trial.
@@ -2023,8 +2023,8 @@ def start_new(cls, estimator, inputs, experiment_config):
             inputs (str): Parameters used when called
                 :meth:`~sagemaker.estimator.EstimatorBase.fit`.
             experiment_config (dict[str, str]): Experiment management configuration.
-                Optionally, the dict can contain three keys:
-                'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
+                Optionally, the dict can contain four keys:
+                'ExperimentName', 'TrialName', 'TrialComponentDisplayName' and 'RunName'.
                 The behavior of setting these keys is as follows:
                 * If `ExperimentName` is supplied but `TrialName` is not a Trial will be
                 automatically created and the job's Trial Component associated with the Trial.
@@ -2033,6 +2033,7 @@ def start_new(cls, estimator, inputs, experiment_config):
                 * If both `ExperimentName` and `TrialName` are not supplied the trial component
                 will be unassociated.
                 * `TrialComponentDisplayName` is used for display in Studio.
+                * `RunName` is used to record an experiment run.
         Returns:
             sagemaker.estimator._TrainingJob: Constructed object that captures
             all information about the started training job.
@@ -2053,8 +2054,8 @@ def _get_train_args(cls, estimator, inputs, experiment_config):
             inputs (str): Parameters used when called
                 :meth:`~sagemaker.estimator.EstimatorBase.fit`.
             experiment_config (dict[str, str]): Experiment management configuration.
-                Optionally, the dict can contain three keys:
-                'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
+                Optionally, the dict can contain four keys:
+                'ExperimentName', 'TrialName', 'TrialComponentDisplayName' and 'RunName'.
                 The behavior of setting these keys is as follows:
                 * If `ExperimentName` is supplied but `TrialName` is not a Trial will be
                 automatically created and the job's Trial Component associated with the Trial.
@@ -2063,6 +2064,7 @@ def _get_train_args(cls, estimator, inputs, experiment_config):
                 * If both `ExperimentName` and `TrialName` are not supplied the trial component
                 will be unassociated.
                 * `TrialComponentDisplayName` is used for display in Studio.
+                * `RunName` is used to record an experiment run.
 
         Returns:
             Dict: dict for `sagemaker.session.Session.train` method
diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py
@@ -548,8 +548,8 @@ def train(  # noqa: C901
                 checkpoints will be provided under `/opt/ml/checkpoints/`.
                 (default: ``None``).
             experiment_config (dict[str, str]): Experiment management configuration.
-                Optionally, the dict can contain three keys:
-                'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
+                Optionally, the dict can contain four keys:
+                'ExperimentName', 'TrialName',  'TrialComponentDisplayName' and 'RunName'.
                 The behavior of setting these keys is as follows:
                 * If `ExperimentName` is supplied but `TrialName` is not a Trial will be
                 automatically created and the job's Trial Component associated with the Trial.
@@ -558,6 +558,7 @@ def train(  # noqa: C901
                 * If both `ExperimentName` and `TrialName` are not supplied the trial component
                 will be unassociated.
                 * `TrialComponentDisplayName` is used for display in Studio.
+                * `RunName` is used to record an experiment run.
             enable_sagemaker_metrics (bool): enable SageMaker Metrics Time
                 Series. For more information see:
                 https://docs.aws.amazon.com/sagemaker/latest/dg/API_AlgorithmSpecification.html#SageMaker-Type-AlgorithmSpecification-EnableSageMakerMetricsTimeSeries
@@ -703,8 +704,8 @@ def _get_train_request(  # noqa: C901
                 checkpoints will be provided under `/opt/ml/checkpoints/`.
                 (default: ``None``).
             experiment_config (dict[str, str]): Experiment management configuration.
-                Optionally, the dict can contain three keys:
-                'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
+                Optionally, the dict can contain four keys:
+                'ExperimentName', 'TrialName', 'TrialComponentDisplayName' and 'RunName'.
                 The behavior of setting these keys is as follows:
                 * If `ExperimentName` is supplied but `TrialName` is not a Trial will be
                 automatically created and the job's Trial Component associated with the Trial.
@@ -713,6 +714,7 @@ def _get_train_request(  # noqa: C901
                 * If both `ExperimentName` and `TrialName` are not supplied the trial component
                 will be unassociated.
                 * `TrialComponentDisplayName` is used for display in Studio.
+                * `RunName` is used to record an experiment run.
             enable_sagemaker_metrics (bool): enable SageMaker Metrics Time
                 Series. For more information see:
                 https://docs.aws.amazon.com/sagemaker/latest/dg/API_AlgorithmSpecification.html#SageMaker-Type-AlgorithmSpecification-EnableSageMakerMetricsTimeSeries
diff --git a/tests/integ/sagemaker/experiments/test_run.py b/tests/integ/sagemaker/experiments/test_run.py
@@ -167,6 +167,12 @@ def validate_tc_artifact_association(is_output, expected_artifact_name):
 _PYTHON_SCRIPT_PATH = os.path.join(DATA_DIR, "experiment/scripts/launcher.sh")
 
 
+@pytest.mark.skip(
+    reason=(
+        "Waiting for the CR https://code.amazon.com/reviews/CR-75915367/revisions/1#/details "
+        "to deploy to us-west-2"
+    )
+)
 def test_run_from_local_and_train_job_and_all_exp_cfg_match(sagemaker_session, job_resource_dir):
     # Notes:
     # 1. The 1st Run TC created locally
@@ -208,6 +214,12 @@ def test_run_from_local_and_train_job_and_all_exp_cfg_match(sagemaker_session, j
             )
 
 
+@pytest.mark.skip(
+    reason=(
+        "Waiting for the CR https://code.amazon.com/reviews/CR-75915367/revisions/1#/details "
+        "to deploy to us-west-2"
+    )
+)
 def test_run_from_local_and_train_job_and_exp_cfg_not_match(sagemaker_session, job_resource_dir):
     # Notes:
     # 1. The 1st Run TC created locally
@@ -249,6 +261,12 @@ def test_run_from_local_and_train_job_and_exp_cfg_not_match(sagemaker_session, j
             )
 
 
+@pytest.mark.skip(
+    reason=(
+        "Waiting for the CR https://code.amazon.com/reviews/CR-75915367/revisions/1#/details "
+        "to deploy to us-west-2"
+    )
+)
 def test_run_from_train_job(sagemaker_session, job_resource_dir):
     # Notes:
     # 1. No Run TC created locally or specified in experiment config
diff --git a/tests/unit/sagemaker/huggingface/test_estimator.py b/tests/unit/sagemaker/huggingface/test_estimator.py
@@ -48,6 +48,7 @@
     "ExperimentName": "exp",
     "TrialName": "trial",
     "TrialComponentDisplayName": "tc",
+    "RunName": "rn",
 }
 
 
diff --git a/tests/unit/sagemaker/tensorflow/test_estimator.py b/tests/unit/sagemaker/tensorflow/test_estimator.py
@@ -56,6 +56,7 @@
     "ExperimentName": "exp",
     "TrialName": "trial",
     "TrialComponentDisplayName": "tc",
+    "RunName": "rn",
 }
 
 
diff --git a/tests/unit/sagemaker/training_compiler/test_huggingface_pytorch_compiler.py b/tests/unit/sagemaker/training_compiler/test_huggingface_pytorch_compiler.py
@@ -52,6 +52,7 @@
     "ExperimentName": "exp",
     "TrialName": "trial",
     "TrialComponentDisplayName": "tc",
+    "RunName": "rn",
 }
 
 
diff --git a/tests/unit/sagemaker/training_compiler/test_huggingface_tensorflow_compiler.py b/tests/unit/sagemaker/training_compiler/test_huggingface_tensorflow_compiler.py
@@ -50,6 +50,7 @@
     "ExperimentName": "exp",
     "TrialName": "trial",
     "TrialComponentDisplayName": "tc",
+    "RunName": "rn",
 }
 
 
diff --git a/tests/unit/sagemaker/training_compiler/test_tensorflow_compiler.py b/tests/unit/sagemaker/training_compiler/test_tensorflow_compiler.py
@@ -50,6 +50,7 @@
     "ExperimentName": "exp",
     "TrialName": "trial",
     "TrialComponentDisplayName": "tc",
+    "RunName": "rn",
 }
 
 
diff --git a/tests/unit/test_amazon_estimator.py b/tests/unit/test_amazon_estimator.py
@@ -239,12 +239,18 @@ def test_fit_pass_experiment_config(sagemaker_session):
     labels = [99, 85, 87, 2]
     pca.fit(
         pca.record_set(np.array(train), np.array(labels)),
-        experiment_config={"ExperimentName": "exp"},
+        experiment_config={
+            "ExperimentName": "exp",
+            "RunName": "rn",
+        },
     )
 
     called_args = sagemaker_session.train.call_args
 
-    assert called_args[1]["experiment_config"] == {"ExperimentName": "exp"}
+    assert called_args[1]["experiment_config"] == {
+        "ExperimentName": "exp",
+        "RunName": "rn",
+    }
 
 
 def test_build_shards():
diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py
@@ -2489,7 +2489,12 @@ def test_start_new(sagemaker_session):
         hyperparameters=hyperparameters,
     )
 
-    exp_config = {"ExperimentName": "exp", "TrialName": "t", "TrialComponentDisplayName": "tc"}
+    exp_config = {
+        "ExperimentName": "exp",
+        "TrialName": "t",
+        "TrialComponentDisplayName": "tc",
+        "RunName": "rn",
+    }
 
     started_training_job = training_job.start_new(estimator, inputs, experiment_config=exp_config)
     called_args = sagemaker_session.train.call_args
@@ -2680,6 +2685,7 @@ def test_unsupported_type_in_dict():
             "ExperimentName": "exp",
             "TrialName": "trial",
             "TrialComponentDisplayName": "tc",
+            "RunName": "rn",
         }
     }
 )
@@ -2884,6 +2890,7 @@ def test_generic_to_fit_with_experiment_config(time, sagemaker_session):
             "ExperimentName": "exp",
             "TrialName": "trial",
             "TrialComponentDisplayName": "tc",
+            "RunName": "rn",
         },
     )
 
diff --git a/tests/unit/test_mxnet.py b/tests/unit/test_mxnet.py
@@ -62,6 +62,7 @@
     "ExperimentName": "exp",
     "TrialName": "trial",
     "TrialComponentDisplayName": "tc",
+    "RunName": "rn",
 }
 
 MODEL_PKG_RESPONSE = {"ModelPackageArn": "arn:model-pkg-arn"}
diff --git a/tests/unit/test_pytorch.py b/tests/unit/test_pytorch.py
@@ -54,6 +54,7 @@
     "ExperimentName": "exp",
     "TrialName": "trial",
     "TrialComponentDisplayName": "tc",
+    "RunName": "rn",
 }
 
 DISTRIBUTION_PYTORCH_DDP_ENABLED = {"pytorchddp": {"enabled": True}}
diff --git a/tests/unit/test_rl.py b/tests/unit/test_rl.py
@@ -49,6 +49,7 @@
     "ExperimentName": "exp",
     "TrialName": "trial",
     "TrialComponentDisplayName": "tc",
+    "RunName": "rn",
 }
 
 
diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py
@@ -700,6 +700,7 @@ def test_training_input_all_arguments():
     "ExperimentName": "dummyExp",
     "TrialName": "dummyT",
     "TrialComponentDisplayName": "dummyTC",
+    "RunName": "dummyRN",
 }
 MODEL_CLIENT_CONFIG = {"InvocationsMaxRetries": 2, "InvocationsTimeoutInSeconds": 60}
 
diff --git a/tests/unit/test_sklearn.py b/tests/unit/test_sklearn.py
@@ -51,6 +51,7 @@
     "ExperimentName": "exp",
     "TrialName": "trial",
     "TrialComponentDisplayName": "tc",
+    "RunName": "rn",
 }
 
 
diff --git a/tests/unit/test_xgboost.py b/tests/unit/test_xgboost.py
@@ -54,6 +54,7 @@
     "ExperimentName": "exp",
     "TrialName": "trial",
     "TrialComponentDisplayName": "tc",
+    "RunName": "rn",
 }
 
 

Original file line number	Diff line number	Diff line change
`@@ -48,6 +48,7 @@`
`48`	`48`	`"ExperimentName": "exp",`
`49`	`49`	`"TrialName": "trial",`
`50`	`50`	`"TrialComponentDisplayName": "tc",`
	`51`	`+ "RunName": "rn",`
`51`	`52`	`}`
`52`	`53`
`53`	`54`
Original file line number	Diff line number	Diff line change
`@@ -56,6 +56,7 @@`
`56`	`56`	`"ExperimentName": "exp",`
`57`	`57`	`"TrialName": "trial",`
`58`	`58`	`"TrialComponentDisplayName": "tc",`
	`59`	`+ "RunName": "rn",`
`59`	`60`	`}`
`60`	`61`
`61`	`62`
Original file line number	Diff line number	Diff line change
`@@ -52,6 +52,7 @@`
`52`	`52`	`"ExperimentName": "exp",`
`53`	`53`	`"TrialName": "trial",`
`54`	`54`	`"TrialComponentDisplayName": "tc",`
	`55`	`+ "RunName": "rn",`
`55`	`56`	`}`
`56`	`57`
`57`	`58`
Original file line number	Diff line number	Diff line change
`@@ -50,6 +50,7 @@`
`50`	`50`	`"ExperimentName": "exp",`
`51`	`51`	`"TrialName": "trial",`
`52`	`52`	`"TrialComponentDisplayName": "tc",`
	`53`	`+ "RunName": "rn",`
`53`	`54`	`}`
`54`	`55`
`55`	`56`
Original file line number	Diff line number	Diff line change
`@@ -2489,7 +2489,12 @@ def test_start_new(sagemaker_session):`
`2489`	`2489`	`hyperparameters=hyperparameters,`
`2490`	`2490`	`)`
`2491`	`2491`
`2492`		`- exp_config = {"ExperimentName": "exp", "TrialName": "t", "TrialComponentDisplayName": "tc"}`
	`2492`	`+ exp_config = {`
	`2493`	`+ "ExperimentName": "exp",`
	`2494`	`+ "TrialName": "t",`
	`2495`	`+ "TrialComponentDisplayName": "tc",`
	`2496`	`+ "RunName": "rn",`
	`2497`	`+ }`
`2493`	`2498`
`2494`	`2499`	`started_training_job = training_job.start_new(estimator, inputs, experiment_config=exp_config)`
`2495`	`2500`	`called_args = sagemaker_session.train.call_args`
`@@ -2680,6 +2685,7 @@ def test_unsupported_type_in_dict():`
`2680`	`2685`	`"ExperimentName": "exp",`
`2681`	`2686`	`"TrialName": "trial",`
`2682`	`2687`	`"TrialComponentDisplayName": "tc",`
	`2688`	`+ "RunName": "rn",`
`2683`	`2689`	`}`
`2684`	`2690`	`}`
`2685`	`2691`	`)`
`@@ -2884,6 +2890,7 @@ def test_generic_to_fit_with_experiment_config(time, sagemaker_session):`
`2884`	`2890`	`"ExperimentName": "exp",`
`2885`	`2891`	`"TrialName": "trial",`
`2886`	`2892`	`"TrialComponentDisplayName": "tc",`
	`2893`	`+ "RunName": "rn",`
`2887`	`2894`	`},`
`2888`	`2895`	`)`
`2889`	`2896`
Original file line number	Diff line number	Diff line change
`@@ -62,6 +62,7 @@`
`62`	`62`	`"ExperimentName": "exp",`
`63`	`63`	`"TrialName": "trial",`
`64`	`64`	`"TrialComponentDisplayName": "tc",`
	`65`	`+ "RunName": "rn",`
`65`	`66`	`}`
`66`	`67`
`67`	`68`	`MODEL_PKG_RESPONSE = {"ModelPackageArn": "arn:model-pkg-arn"}`
Original file line number	Diff line number	Diff line change
`@@ -54,6 +54,7 @@`
`54`	`54`	`"ExperimentName": "exp",`
`55`	`55`	`"TrialName": "trial",`
`56`	`56`	`"TrialComponentDisplayName": "tc",`
	`57`	`+ "RunName": "rn",`
`57`	`58`	`}`
`58`	`59`
`59`	`60`	`DISTRIBUTION_PYTORCH_DDP_ENABLED = {"pytorchddp": {"enabled": True}}`
Original file line number	Diff line number	Diff line change
`@@ -49,6 +49,7 @@`
`49`	`49`	`"ExperimentName": "exp",`
`50`	`50`	`"TrialName": "trial",`
`51`	`51`	`"TrialComponentDisplayName": "tc",`
	`52`	`+ "RunName": "rn",`
`52`	`53`	`}`
`53`	`54`
`54`	`55`
Original file line number	Diff line number	Diff line change
`@@ -700,6 +700,7 @@ def test_training_input_all_arguments():`
`700`	`700`	`"ExperimentName": "dummyExp",`
`701`	`701`	`"TrialName": "dummyT",`
`702`	`702`	`"TrialComponentDisplayName": "dummyTC",`
	`703`	`+ "RunName": "dummyRN",`
`703`	`704`	`}`
`704`	`705`	`MODEL_CLIENT_CONFIG = {"InvocationsMaxRetries": 2, "InvocationsTimeoutInSeconds": 60}`
`705`	`706`
Original file line number	Diff line number	Diff line change
`@@ -51,6 +51,7 @@`
`51`	`51`	`"ExperimentName": "exp",`
`52`	`52`	`"TrialName": "trial",`
`53`	`53`	`"TrialComponentDisplayName": "tc",`
	`54`	`+ "RunName": "rn",`
`54`	`55`	`}`
`55`	`56`
`56`	`57`