fix: Prevent passing PipelineVariable object into image_uris.retrieve

Dewen Qi · Dewen Qi · commit d972439e0c0a · 2022-04-26T17:58:00.000-07:00
diff --git a/src/sagemaker/image_uris.py b/src/sagemaker/image_uris.py
@@ -23,6 +23,7 @@
 from sagemaker.jumpstart.utils import is_jumpstart_model_input
 from sagemaker.spark import defaults
 from sagemaker.jumpstart import artifacts
+from sagemaker.workflow import is_pipeline_variable
 
 logger = logging.getLogger(__name__)
 
@@ -104,11 +105,17 @@ def retrieve(
 
     Raises:
         NotImplementedError: If the scope is not supported.
-        ValueError: If the combination of arguments specified is not supported.
+        ValueError: If the combination of arguments specified is not supported or
+            any PipelineVariable object is passed in.
         VulnerableJumpStartModelError: If any of the dependencies required by the script have
             known security vulnerabilities.
         DeprecatedJumpStartModelError: If the version of the model is deprecated.
     """
+    args = dict(locals())
+    for name, val in args.items():
+        if is_pipeline_variable(val):
+            raise ValueError("%s should not be a pipeline variable (%s)" % (name, type(val)))
+
     if is_jumpstart_model_input(model_id, model_version):
         return artifacts._retrieve_image_uri(
             model_id,
diff --git a/src/sagemaker/workflow/entities.py b/src/sagemaker/workflow/entities.py
@@ -78,7 +78,11 @@ def __add__(self, other: Union[Expression, PrimitiveType]):
 
     def __str__(self):
         """Override built-in String function for PipelineVariable"""
-        raise TypeError("Pipeline variables do not support __str__ operation.")
+        raise TypeError(
+            "Pipeline variables do not support __str__ operation. "
+            "Please use `.to_string()` to convert it to string type in execution time"
+            "or use `.expr` to translate it to Json for display purpose in Python SDK."
+        )
 
     def __int__(self):
         """Override built-in Integer function for PipelineVariable"""
diff --git a/tests/integ/sagemaker/workflow/test_model_registration.py b/tests/integ/sagemaker/workflow/test_model_registration.py
@@ -84,10 +84,12 @@ def test_conditional_pytorch_training_model_registration(
     inputs = TrainingInput(s3_data=input_path)
 
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
-    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
+    instance_type = "ml.m5.xlarge"
     good_enough_input = ParameterInteger(name="GoodEnoughInput", default_value=1)
     in_condition_input = ParameterString(name="Foo", default_value="Foo")
 
+    # If image_uri is not provided, the instance_type should not be a pipeline variable
+    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     pytorch_estimator = PyTorch(
         entry_point=entry_point,
         role=role,
@@ -146,7 +148,6 @@ def test_conditional_pytorch_training_model_registration(
             in_condition_input,
             good_enough_input,
             instance_count,
-            instance_type,
         ],
         steps=[step_cond],
         sagemaker_session=sagemaker_session,
@@ -252,8 +253,10 @@ def test_sklearn_xgboost_sip_model_registration(
     prefix = "sip"
     bucket_name = sagemaker_session.default_bucket()
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
-    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
+    instance_type = "ml.m5.xlarge"
 
+    # The instance_type should not be a pipeline variable
+    # since it is used to retrieve image_uri in compile time (PySDK)
     sklearn_processor = SKLearnProcessor(
         role=role,
         instance_type=instance_type,
@@ -324,6 +327,8 @@ def test_sklearn_xgboost_sip_model_registration(
     source_dir = base_dir
     code_location = "s3://{0}/{1}/code".format(bucket_name, prefix)
 
+    # If image_uri is not provided, the instance_type should not be a pipeline variable
+    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     estimator = XGBoost(
         entry_point=entry_point,
         source_dir=source_dir,
@@ -409,7 +414,6 @@ def test_sklearn_xgboost_sip_model_registration(
             train_data_path_param,
             val_data_path_param,
             model_path_param,
-            instance_type,
             instance_count,
             output_path_param,
         ],
@@ -455,7 +459,7 @@ def test_model_registration_with_drift_check_baselines(
     pipeline_name,
 ):
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
-    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
+    instance_type = "ml.m5.xlarge"
 
     # upload model data to s3
     model_local_path = os.path.join(DATA_DIR, "mxnet_mnist/model.tar.gz")
@@ -543,6 +547,9 @@ def test_model_registration_with_drift_check_baselines(
         ),
     )
     customer_metadata_properties = {"key1": "value1"}
+
+    # If image_uri is not provided, the instance_type should not be a pipeline variable
+    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     estimator = XGBoost(
         entry_point="training.py",
         source_dir=os.path.join(DATA_DIR, "sip"),
@@ -572,7 +579,6 @@ def test_model_registration_with_drift_check_baselines(
         parameters=[
             model_uri_param,
             metrics_uri_param,
-            instance_type,
             instance_count,
         ],
         steps=[step_register],
@@ -660,9 +666,11 @@ def test_model_registration_with_model_repack(
     inputs = TrainingInput(s3_data=input_path)
 
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
-    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
+    instance_type = "ml.m5.xlarge"
     good_enough_input = ParameterInteger(name="GoodEnoughInput", default_value=1)
 
+    # If image_uri is not provided, the instance_type should not be a pipeline variable
+    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     pytorch_estimator = PyTorch(
         entry_point=entry_point,
         role=role,
@@ -717,7 +725,7 @@ def test_model_registration_with_model_repack(
 
     pipeline = Pipeline(
         name=pipeline_name,
-        parameters=[good_enough_input, instance_count, instance_type],
+        parameters=[good_enough_input, instance_count],
         steps=[step_cond],
         sagemaker_session=sagemaker_session,
     )
diff --git a/tests/integ/sagemaker/workflow/test_retry.py b/tests/integ/sagemaker/workflow/test_retry.py
@@ -26,10 +26,7 @@
     DatasetDefinition,
     AthenaDatasetDefinition,
 )
-from sagemaker.workflow.parameters import (
-    ParameterInteger,
-    ParameterString,
-)
+from sagemaker.workflow.parameters import ParameterInteger
 from sagemaker.pytorch.estimator import PyTorch
 from sagemaker.workflow.pipeline import Pipeline
 from sagemaker.workflow.retry import (
@@ -183,9 +180,11 @@ def test_model_registration_with_model_repack(
     inputs = TrainingInput(s3_data=input_path)
 
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
-    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
+    instance_type = "ml.m5.xlarge"
     good_enough_input = ParameterInteger(name="GoodEnoughInput", default_value=1)
 
+    # If image_uri is not provided, the instance_type should not be a pipeline variable
+    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     pytorch_estimator = PyTorch(
         entry_point=entry_point,
         role=role,
@@ -247,7 +246,7 @@ def test_model_registration_with_model_repack(
 
     pipeline = Pipeline(
         name=pipeline_name,
-        parameters=[good_enough_input, instance_count, instance_type],
+        parameters=[good_enough_input, instance_count],
         steps=[step_cond],
         sagemaker_session=sagemaker_session,
     )
diff --git a/tests/integ/sagemaker/workflow/test_training_steps.py b/tests/integ/sagemaker/workflow/test_training_steps.py
@@ -59,7 +59,7 @@ def test_training_job_with_debugger_and_profiler(
     pytorch_training_latest_py_version,
 ):
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
-    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
+    instance_type = "ml.m5.xlarge"
 
     rules = [
         Rule.sagemaker(rule_configs.vanishing_gradient()),
@@ -78,6 +78,8 @@ def test_training_job_with_debugger_and_profiler(
     )
     inputs = TrainingInput(s3_data=input_path)
 
+    # If image_uri is not provided, the instance_type should not be a pipeline variable
+    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     pytorch_estimator = PyTorch(
         entry_point=script_path,
         role="SageMakerRole",
@@ -98,7 +100,7 @@ def test_training_job_with_debugger_and_profiler(
 
     pipeline = Pipeline(
         name=pipeline_name,
-        parameters=[instance_count, instance_type],
+        parameters=[instance_count],
         steps=[step_train],
         sagemaker_session=sagemaker_session,
     )
diff --git a/tests/integ/sagemaker/workflow/test_tuning_steps.py b/tests/integ/sagemaker/workflow/test_tuning_steps.py
@@ -93,8 +93,10 @@ def test_tuning_single_algo(
     inputs = TrainingInput(s3_data=input_path)
 
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
-    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
+    instance_type = "ml.m5.xlarge"
 
+    # If image_uri is not provided, the instance_type should not be a pipeline variable
+    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     pytorch_estimator = PyTorch(
         entry_point=entry_point,
         role=role,
@@ -168,7 +170,7 @@ def test_tuning_single_algo(
 
     pipeline = Pipeline(
         name=pipeline_name,
-        parameters=[instance_count, instance_type, min_batch_size, max_batch_size],
+        parameters=[instance_count, min_batch_size, max_batch_size],
         steps=[step_tune, step_best_model, step_second_best_model],
         sagemaker_session=sagemaker_session,
     )
@@ -225,10 +227,12 @@ def test_tuning_multi_algos(
     )
 
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
-    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
+    instance_type = "ml.m5.xlarge"
 
     input_data = f"s3://sagemaker-sample-data-{region_name}/processing/census/census-income.csv"
 
+    # The instance_type should not be a pipeline variable
+    # since it is used to retrieve image_uri in compile time (PySDK)
     sklearn_processor = SKLearnProcessor(
         framework_version="0.20.0",
         instance_type=instance_type,
@@ -263,6 +267,8 @@ def test_tuning_multi_algos(
     json_get_hp = JsonGet(
         step_name=step_process.name, property_file=property_file, json_path="train_size"
     )
+    # If image_uri is not provided, the instance_type should not be a pipeline variable
+    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     pytorch_estimator = PyTorch(
         entry_point=entry_point,
         role=role,
@@ -311,7 +317,7 @@ def test_tuning_multi_algos(
 
     pipeline = Pipeline(
         name=pipeline_name,
-        parameters=[instance_count, instance_type, min_batch_size, max_batch_size],
+        parameters=[instance_count, min_batch_size, max_batch_size],
         steps=[step_process, step_tune],
         sagemaker_session=sagemaker_session,
     )
diff --git a/tests/integ/sagemaker/workflow/test_workflow.py b/tests/integ/sagemaker/workflow/test_workflow.py
@@ -157,12 +157,14 @@ def test_three_step_definition(
     athena_dataset_definition,
 ):
     framework_version = "0.20.0"
-    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
+    instance_type = "ml.m5.xlarge"
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
     output_prefix = ParameterString(name="OutputPrefix", default_value="output")
 
     input_data = f"s3://sagemaker-sample-data-{region_name}/processing/census/census-income.csv"
 
+    # The instance_type should not be a pipeline variable
+    # since it is used to retrieve image_uri in compile time (PySDK)
     sklearn_processor = SKLearnProcessor(
         framework_version=framework_version,
         instance_type=instance_type,
@@ -200,6 +202,8 @@ def test_three_step_definition(
         code=os.path.join(script_dir, "preprocessing.py"),
     )
 
+    # If image_uri is not provided, the instance_type should not be a pipeline variable
+    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     sklearn_train = SKLearn(
         framework_version=framework_version,
         entry_point=os.path.join(script_dir, "train.py"),
@@ -239,7 +243,7 @@ def test_three_step_definition(
 
     pipeline = Pipeline(
         name=pipeline_name,
-        parameters=[instance_type, instance_count, output_prefix],
+        parameters=[instance_count, output_prefix],
         steps=[step_process, step_train, step_model],
         sagemaker_session=sagemaker_session,
     )
@@ -340,10 +344,12 @@ def test_steps_with_map_params_pipeline(
 ):
     instance_count = ParameterInteger(name="InstanceCount", default_value=2)
     framework_version = "0.20.0"
-    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
+    instance_type = "ml.m5.xlarge"
     output_prefix = ParameterString(name="OutputPrefix", default_value="output")
     input_data = f"s3://sagemaker-sample-data-{region_name}/processing/census/census-income.csv"
 
+    # The instance_type should not be a pipeline variable
+    # since it is used to retrieve image_uri in compile time (PySDK)
     sklearn_processor = SKLearnProcessor(
         framework_version=framework_version,
         instance_type=instance_type,
@@ -381,6 +387,8 @@ def test_steps_with_map_params_pipeline(
         code=os.path.join(script_dir, "preprocessing.py"),
     )
 
+    # If image_uri is not provided, the instance_type should not be a pipeline variable
+    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     sklearn_train = SKLearn(
         framework_version=framework_version,
         entry_point=os.path.join(script_dir, "train.py"),
@@ -437,7 +445,7 @@ def test_steps_with_map_params_pipeline(
 
     pipeline = Pipeline(
         name=pipeline_name,
-        parameters=[instance_type, instance_count, output_prefix],
+        parameters=[instance_count, output_prefix],
         steps=[step_process, step_train, step_cond],
         sagemaker_session=sagemaker_session,
     )
diff --git a/tests/unit/sagemaker/image_uris/test_retrieve.py b/tests/unit/sagemaker/image_uris/test_retrieve.py
@@ -19,6 +19,7 @@
 from mock import patch
 
 from sagemaker import image_uris
+from sagemaker.workflow.parameters import ParameterString
 
 BASE_CONFIG = {
     "processors": ["cpu", "gpu"],
@@ -717,3 +718,19 @@ def test_retrieve_huggingface(config_for_framework):
         "564829616587.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-training:"
         "1.6.0-transformers4.3.1-gpu-py37-cu110-ubuntu18.04" == pt_new_version
     )
+
+
+def test_retrieve_with_pipeline_variable():
+    with pytest.raises(Exception) as error:
+        image_uris.retrieve(
+            framework="tensorflow",
+            version="1.15",
+            py_version="py3",
+            instance_type=ParameterString(
+                name="TrainingInstanceType",
+                default_value="ml.m5.xlarge",
+            ),
+            region="us-east-1",
+            image_scope="training",
+        )
+    assert "instance_type should not be a pipeline variable" in str(error.value)
diff --git a/tests/unit/sagemaker/workflow/test_execution_variables.py b/tests/unit/sagemaker/workflow/test_execution_variables.py
@@ -33,7 +33,7 @@ def test_implicit_value():
 
     with pytest.raises(TypeError) as error:
         str(var)
-    assert str(error.value) == "Pipeline variables do not support __str__ operation."
+    assert "Pipeline variables do not support __str__ operation." in str(error.value)
 
     with pytest.raises(TypeError) as error:
         int(var)
diff --git a/tests/unit/sagemaker/workflow/test_functions.py b/tests/unit/sagemaker/workflow/test_functions.py
@@ -81,7 +81,7 @@ def test_implicit_value_on_join():
 
     with pytest.raises(TypeError) as error:
         str(func)
-    assert str(error.value) == "Pipeline variables do not support __str__ operation."
+    assert "Pipeline variables do not support __str__ operation." in str(error.value)
 
     with pytest.raises(TypeError) as error:
         int(func)
@@ -189,7 +189,7 @@ def test_implicit_value_on_json_get():
 
     with pytest.raises(TypeError) as error:
         str(func)
-    assert str(error.value) == "Pipeline variables do not support __str__ operation."
+    assert "Pipeline variables do not support __str__ operation." in str(error.value)
 
     with pytest.raises(TypeError) as error:
         int(func)
diff --git a/tests/unit/sagemaker/workflow/test_parameters.py b/tests/unit/sagemaker/workflow/test_parameters.py
@@ -76,7 +76,7 @@ def test_parameter_to_string_and_string_implicit_value():
     with pytest.raises(TypeError) as error:
         str(param)
 
-    assert str(error.value) == "Pipeline variables do not support __str__ operation."
+    assert "Pipeline variables do not support __str__ operation." in str(error.value)
 
 
 def test_parameter_integer_implicit_value():
diff --git a/tests/unit/sagemaker/workflow/test_properties.py b/tests/unit/sagemaker/workflow/test_properties.py
@@ -111,7 +111,7 @@ def test_implicit_value():
 
     with pytest.raises(TypeError) as error:
         str(prop.CreationTime)
-    assert str(error.value) == "Pipeline variables do not support __str__ operation."
+    assert "Pipeline variables do not support __str__ operation." in str(error.value)
 
     with pytest.raises(TypeError) as error:
         int(prop.CreationTime)