aws · benieric · Sep 26, 2023 · Sep 21, 2023 · Sep 23, 2023 · Sep 25, 2023
@@ -25,9 +25,7 @@
 TARGET_ATTRIBUTE_NAME = "virginica"
 
 
-def create_auto_ml_job_if_not_exist(sagemaker_session):
-    auto_ml_job_name = "python-sdk-integ-test-base-job"
-
+def create_auto_ml_job_if_not_exist(sagemaker_session, auto_ml_job_name):
     try:
         sagemaker_session.describe_auto_ml_job(job_name=auto_ml_job_name)
     except Exception as e:  # noqa: F841

@@ -22,6 +22,7 @@
 from sagemaker.utils import unique_name_from_base
 from tests.integ import AUTO_ML_DEFAULT_TIMEMOUT_MINUTES, DATA_DIR, auto_ml_utils
 from tests.integ.timeout import timeout
+from tests.conftest import CUSTOM_S3_OBJECT_KEY_PREFIX
 
 ROLE = "SageMakerRole"
 PREFIX = "sagemaker/beta-automl-xgboost"
@@ -38,8 +39,6 @@
 BASE_JOB_NAME = "auto-ml"
 MODE = "ENSEMBLING"
 
-# use a succeeded AutoML job to test describe and list candidates method, otherwise tests will run too long
-AUTO_ML_JOB_NAME = "python-sdk-integ-test-base-job"
 DEFAULT_MODEL_NAME = "python-sdk-automl"
 
 
@@ -49,6 +48,14 @@
 }
 
 
+# use a succeeded AutoML job to test describe and list candidates method, otherwise tests will run too long
+# test-session-job will be created once per session if it doesn't exist, and be reused in relevant tests.
+@pytest.fixture(scope="module")
+def test_session_job_name():
+    job_name = unique_name_from_base("test-session-job", max_length=32)
+    return job_name
+
+
 @pytest.mark.slow_test
 @pytest.mark.skipif(
     tests.integ.test_region() in tests.integ.NO_AUTO_ML_REGIONS,
@@ -63,7 +70,7 @@ def test_auto_ml_fit(sagemaker_session):
         max_candidates=1,
     )
 
-    job_name = unique_name_from_base("auto-ml", max_length=32)
+    job_name = unique_name_from_base(BASE_JOB_NAME, max_length=32)
     inputs = sagemaker_session.upload_data(path=TRAINING_DATA, key_prefix=PREFIX + "/input")
     with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES):
         auto_ml.fit(inputs, job_name=job_name)
@@ -82,7 +89,7 @@ def test_auto_ml_fit_local_input(sagemaker_session):
     )
 
     inputs = TRAINING_DATA
-    job_name = unique_name_from_base("auto-ml", max_length=32)
+    job_name = unique_name_from_base(BASE_JOB_NAME, max_length=32)
     with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES):
         auto_ml.fit(inputs, job_name=job_name)
 
@@ -99,7 +106,7 @@ def test_auto_ml_input_object_fit(sagemaker_session):
         max_candidates=1,
         generate_candidate_definitions_only=True,
     )
-    job_name = unique_name_from_base("auto-ml", max_length=32)
+    job_name = unique_name_from_base(BASE_JOB_NAME, max_length=32)
     s3_input = sagemaker_session.upload_data(path=TRAINING_DATA, key_prefix=PREFIX + "/input")
     inputs = AutoMLInput(inputs=s3_input, target_attribute_name=TARGET_ATTRIBUTE_NAME)
     with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES):
@@ -118,7 +125,7 @@ def test_auto_ml_input_object_list_fit(sagemaker_session):
         max_candidates=1,
         mode=MODE,
     )
-    job_name = unique_name_from_base("auto-ml", max_length=32)
+    job_name = unique_name_from_base(BASE_JOB_NAME, max_length=32)
     s3_input_training = sagemaker_session.upload_data(
         path=TRAINING_DATA, key_prefix=PREFIX + "/input"
     )
@@ -178,7 +185,7 @@ def test_auto_ml_invalid_target_attribute(sagemaker_session):
     auto_ml = AutoML(
         role=ROLE, target_attribute_name="y", sagemaker_session=sagemaker_session, max_candidates=1
     )
-    job_name = unique_name_from_base("auto-ml", max_length=32)
+    job_name = unique_name_from_base(BASE_JOB_NAME, max_length=32)
     inputs = sagemaker_session.upload_data(path=TRAINING_DATA, key_prefix=PREFIX + "/input")
     with pytest.raises(
         ClientError,
@@ -192,14 +199,14 @@ def test_auto_ml_invalid_target_attribute(sagemaker_session):
     tests.integ.test_region() in tests.integ.NO_AUTO_ML_REGIONS,
     reason="AutoML is not supported in the region yet.",
 )
-def test_auto_ml_describe_auto_ml_job(sagemaker_session):
+def test_auto_ml_describe_auto_ml_job(sagemaker_session, test_session_job_name):
     expected_default_input_config = [
         {
             "DataSource": {
                 "S3DataSource": {
                     "S3DataType": "S3Prefix",
-                    "S3Uri": "s3://{}/{}/input/iris_training.csv".format(
-                        sagemaker_session.default_bucket(), PREFIX
+                    "S3Uri": "s3://{}/{}/{}/input/iris_training.csv".format(
+                        sagemaker_session.default_bucket(), CUSTOM_S3_OBJECT_KEY_PREFIX, PREFIX
                     ),
                 }
             },
@@ -209,16 +216,18 @@ def test_auto_ml_describe_auto_ml_job(sagemaker_session):
         }
     ]
     expected_default_output_config = {
-        "S3OutputPath": "s3://{}/".format(sagemaker_session.default_bucket())
+        "S3OutputPath": "s3://{}/{}/".format(
+            sagemaker_session.default_bucket(), CUSTOM_S3_OBJECT_KEY_PREFIX
+        )
     }
 
-    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)
+    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session, test_session_job_name)
     auto_ml = AutoML(
         role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
     )
 
-    desc = auto_ml.describe_auto_ml_job(job_name=AUTO_ML_JOB_NAME)
-    assert desc["AutoMLJobName"] == AUTO_ML_JOB_NAME
+    desc = auto_ml.describe_auto_ml_job(job_name=test_session_job_name)
+    assert desc["AutoMLJobName"] == test_session_job_name
     assert desc["AutoMLJobStatus"] == "Completed"
     assert isinstance(desc["BestCandidate"], dict)
     assert desc["InputDataConfig"] == expected_default_input_config
@@ -230,14 +239,14 @@ def test_auto_ml_describe_auto_ml_job(sagemaker_session):
     tests.integ.test_region() in tests.integ.NO_AUTO_ML_REGIONS,
     reason="AutoML is not supported in the region yet.",
 )
-def test_auto_ml_attach(sagemaker_session):
+def test_auto_ml_attach(sagemaker_session, test_session_job_name):
     expected_default_input_config = [
         {
             "DataSource": {
                 "S3DataSource": {
                     "S3DataType": "S3Prefix",
-                    "S3Uri": "s3://{}/{}/input/iris_training.csv".format(
-                        sagemaker_session.default_bucket(), PREFIX
+                    "S3Uri": "s3://{}/{}/{}/input/iris_training.csv".format(
+                        sagemaker_session.default_bucket(), CUSTOM_S3_OBJECT_KEY_PREFIX, PREFIX
                     ),
                 }
             },
@@ -247,16 +256,18 @@ def test_auto_ml_attach(sagemaker_session):
         }
     ]
     expected_default_output_config = {
-        "S3OutputPath": "s3://{}/".format(sagemaker_session.default_bucket())
+        "S3OutputPath": "s3://{}/{}/".format(
+            sagemaker_session.default_bucket(), CUSTOM_S3_OBJECT_KEY_PREFIX
+        )
     }
 
-    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)
+    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session, test_session_job_name)
 
     attached_automl_job = AutoML.attach(
-        auto_ml_job_name=AUTO_ML_JOB_NAME, sagemaker_session=sagemaker_session
+        auto_ml_job_name=test_session_job_name, sagemaker_session=sagemaker_session
     )
     attached_desc = attached_automl_job.describe_auto_ml_job()
-    assert attached_desc["AutoMLJobName"] == AUTO_ML_JOB_NAME
+    assert attached_desc["AutoMLJobName"] == test_session_job_name
     assert attached_desc["AutoMLJobStatus"] == "Completed"
     assert isinstance(attached_desc["BestCandidate"], dict)
     assert attached_desc["InputDataConfig"] == expected_default_input_config
@@ -268,28 +279,28 @@ def test_auto_ml_attach(sagemaker_session):
     tests.integ.test_region() in tests.integ.NO_AUTO_ML_REGIONS,
     reason="AutoML is not supported in the region yet.",
 )
-def test_list_candidates(sagemaker_session):
-    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)
+def test_list_candidates(sagemaker_session, test_session_job_name):
+    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session, test_session_job_name)
 
     auto_ml = AutoML(
         role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
     )
 
-    candidates = auto_ml.list_candidates(job_name=AUTO_ML_JOB_NAME)
+    candidates = auto_ml.list_candidates(job_name=test_session_job_name)
     assert len(candidates) == 3
 
 
 @pytest.mark.skipif(
     tests.integ.test_region() in tests.integ.NO_AUTO_ML_REGIONS,
     reason="AutoML is not supported in the region yet.",
 )
-def test_best_candidate(sagemaker_session):
-    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)
+def test_best_candidate(sagemaker_session, test_session_job_name):
+    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session, test_session_job_name)
 
     auto_ml = AutoML(
         role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
     )
-    best_candidate = auto_ml.best_candidate(job_name=AUTO_ML_JOB_NAME)
+    best_candidate = auto_ml.best_candidate(job_name=test_session_job_name)
     assert len(best_candidate["InferenceContainers"]) == 3
     assert len(best_candidate["CandidateSteps"]) == 4
     assert best_candidate["CandidateStatus"] == "Completed"
@@ -300,13 +311,13 @@ def test_best_candidate(sagemaker_session):
     reason="AutoML is not supported in the region yet.",
 )
 @pytest.mark.release
-def test_deploy_best_candidate(sagemaker_session, cpu_instance_type):
-    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)
+def test_deploy_best_candidate(sagemaker_session, cpu_instance_type, test_session_job_name):
+    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session, test_session_job_name)
 
     auto_ml = AutoML(
         role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
     )
-    best_candidate = auto_ml.best_candidate(job_name=AUTO_ML_JOB_NAME)
+    best_candidate = auto_ml.best_candidate(job_name=test_session_job_name)
     endpoint_name = unique_name_from_base("sagemaker-auto-ml-best-candidate-test")
 
     with timeout(minutes=AUTO_ML_DEFAULT_TIMEMOUT_MINUTES):
@@ -331,14 +342,16 @@ def test_deploy_best_candidate(sagemaker_session, cpu_instance_type):
 @pytest.mark.skip(
     reason="",
 )
-def test_candidate_estimator_default_rerun_and_deploy(sagemaker_session, cpu_instance_type):
-    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)
+def test_candidate_estimator_default_rerun_and_deploy(
+    sagemaker_session, cpu_instance_type, test_session_job_name
+):
+    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session, test_session_job_name)
 
     auto_ml = AutoML(
         role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
     )
 
-    candidates = auto_ml.list_candidates(job_name=AUTO_ML_JOB_NAME)
+    candidates = auto_ml.list_candidates(job_name=test_session_job_name)
     candidate = candidates[1]
 
     candidate_estimator = CandidateEstimator(candidate, sagemaker_session)
@@ -364,13 +377,13 @@ def test_candidate_estimator_default_rerun_and_deploy(sagemaker_session, cpu_ins
     tests.integ.test_region() in tests.integ.NO_AUTO_ML_REGIONS,
     reason="AutoML is not supported in the region yet.",
 )
-def test_candidate_estimator_get_steps(sagemaker_session):
-    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session)
+def test_candidate_estimator_get_steps(sagemaker_session, test_session_job_name):
+    auto_ml_utils.create_auto_ml_job_if_not_exist(sagemaker_session, test_session_job_name)
 
     auto_ml = AutoML(
         role=ROLE, target_attribute_name=TARGET_ATTRIBUTE_NAME, sagemaker_session=sagemaker_session
     )
-    candidates = auto_ml.list_candidates(job_name=AUTO_ML_JOB_NAME)
+    candidates = auto_ml.list_candidates(job_name=test_session_job_name)
     candidate = candidates[1]
 
     candidate_estimator = CandidateEstimator(candidate, sagemaker_session)

@@ -56,6 +56,8 @@ markers =
     canary_quick
     cron
     local_mode
+    slow_test
+    release
     timeout: mark a test as a timeout.
 
 [testenv]