aws · shreyapandit · May 19, 2022 · May 19, 2022
@@ -23,7 +23,6 @@
 from sagemaker.jumpstart.utils import is_jumpstart_model_input
 from sagemaker.spark import defaults
 from sagemaker.jumpstart import artifacts
-from sagemaker.workflow import is_pipeline_variable
 
 logger = logging.getLogger(__name__)
 
@@ -105,17 +104,11 @@ def retrieve(
 
     Raises:
         NotImplementedError: If the scope is not supported.
-        ValueError: If the combination of arguments specified is not supported or
-            any PipelineVariable object is passed in.
+        ValueError: If the combination of arguments specified is not supported.
         VulnerableJumpStartModelError: If any of the dependencies required by the script have
             known security vulnerabilities.
         DeprecatedJumpStartModelError: If the version of the model is deprecated.
     """
-    args = dict(locals())
-    for name, val in args.items():
-        if is_pipeline_variable(val):
-            raise ValueError("%s should not be a pipeline variable (%s)" % (name, type(val)))
-
     if is_jumpstart_model_input(model_id, model_version):
         return artifacts._retrieve_image_uri(
             model_id,

@@ -78,11 +78,7 @@ def __add__(self, other: Union[Expression, PrimitiveType]):
 
     def __str__(self):
         """Override built-in String function for PipelineVariable"""
-        raise TypeError(
-            "Pipeline variables do not support __str__ operation. "
-            "Please use `.to_string()` to convert it to string type in execution time"
-            "or use `.expr` to translate it to Json for display purpose in Python SDK."
-        )
+        raise TypeError("Pipeline variables do not support __str__ operation.")
 
     def __int__(self):
         """Override built-in Integer function for PipelineVariable"""

@@ -90,12 +90,10 @@ def test_conditional_pytorch_training_model_registration(
     inputs = TrainingInput(s3_data=input_path)
 
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
-    instance_type = "ml.m5.xlarge"
+    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
     good_enough_input = ParameterInteger(name="GoodEnoughInput", default_value=1)
     in_condition_input = ParameterString(name="Foo", default_value="Foo")
 
-    # If image_uri is not provided, the instance_type should not be a pipeline variable
-    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     pytorch_estimator = PyTorch(
         entry_point=entry_point,
         role=role,
@@ -155,6 +153,7 @@ def test_conditional_pytorch_training_model_registration(
             in_condition_input,
             good_enough_input,
             instance_count,
+            instance_type,
         ],
         steps=[step_train, step_cond],
         sagemaker_session=sagemaker_session,
@@ -260,10 +259,8 @@ def test_sklearn_xgboost_sip_model_registration(
     prefix = "sip"
     bucket_name = sagemaker_session.default_bucket()
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
-    instance_type = "ml.m5.xlarge"
+    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
 
-    # The instance_type should not be a pipeline variable
-    # since it is used to retrieve image_uri in compile time (PySDK)
     sklearn_processor = SKLearnProcessor(
         role=role,
         instance_type=instance_type,
@@ -334,8 +331,6 @@ def test_sklearn_xgboost_sip_model_registration(
     source_dir = base_dir
     code_location = "s3://{0}/{1}/code".format(bucket_name, prefix)
 
-    # If image_uri is not provided, the instance_type should not be a pipeline variable
-    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     estimator = XGBoost(
         entry_point=entry_point,
         source_dir=source_dir,
@@ -421,6 +416,7 @@ def test_sklearn_xgboost_sip_model_registration(
             train_data_path_param,
             val_data_path_param,
             model_path_param,
+            instance_type,
             instance_count,
             output_path_param,
         ],
@@ -466,7 +462,7 @@ def test_model_registration_with_drift_check_baselines(
     pipeline_name,
 ):
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
-    instance_type = "ml.m5.xlarge"
+    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
 
     # upload model data to s3
     model_local_path = os.path.join(DATA_DIR, "mxnet_mnist/model.tar.gz")
@@ -554,9 +550,6 @@ def test_model_registration_with_drift_check_baselines(
         ),
     )
     customer_metadata_properties = {"key1": "value1"}
-
-    # If image_uri is not provided, the instance_type should not be a pipeline variable
-    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     estimator = XGBoost(
         entry_point="training.py",
         source_dir=os.path.join(DATA_DIR, "sip"),
@@ -586,6 +579,7 @@ def test_model_registration_with_drift_check_baselines(
         parameters=[
             model_uri_param,
             metrics_uri_param,
+            instance_type,
             instance_count,
         ],
         steps=[step_register],
@@ -673,11 +667,9 @@ def test_model_registration_with_model_repack(
     inputs = TrainingInput(s3_data=input_path)
 
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
-    instance_type = "ml.m5.xlarge"
+    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
     good_enough_input = ParameterInteger(name="GoodEnoughInput", default_value=1)
 
-    # If image_uri is not provided, the instance_type should not be a pipeline variable
-    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     pytorch_estimator = PyTorch(
         entry_point=entry_point,
         role=role,
@@ -732,7 +724,7 @@ def test_model_registration_with_model_repack(
 
     pipeline = Pipeline(
         name=pipeline_name,
-        parameters=[good_enough_input, instance_count],
+        parameters=[good_enough_input, instance_count, instance_type],
         steps=[step_cond],
         sagemaker_session=sagemaker_session,
     )
@@ -775,10 +767,8 @@ def test_model_registration_with_tensorflow_model_with_pipeline_model(
     inputs = TrainingInput(s3_data=input_path)
 
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
-    instance_type = "ml.m5.xlarge"
+    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
 
-    # If image_uri is not provided, the instance_type should not be a pipeline variable
-    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     tensorflow_estimator = TensorFlow(
         entry_point=entry_point,
         role=role,
@@ -819,7 +809,10 @@ def test_model_registration_with_tensorflow_model_with_pipeline_model(
 
     pipeline = Pipeline(
         name=pipeline_name,
-        parameters=[instance_count],
+        parameters=[
+            instance_count,
+            instance_type,
+        ],
         steps=[step_train, step_register_model],
         sagemaker_session=sagemaker_session,
     )

@@ -83,16 +83,15 @@ def test_pytorch_training_model_registration_and_creation_without_custom_inferen
     inputs = TrainingInput(s3_data=input_path)
 
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
+    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
 
-    # If image_uri is not provided, the instance_type should not be a pipeline variable
-    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     pytorch_estimator = PyTorch(
         entry_point=entry_point,
         role=role,
         framework_version="1.5.0",
         py_version="py3",
         instance_count=instance_count,
-        instance_type="ml.m5.xlarge",
+        instance_type=instance_type,
         sagemaker_session=pipeline_session,
     )
     train_step_args = pytorch_estimator.fit(inputs=inputs)
@@ -141,7 +140,7 @@ def test_pytorch_training_model_registration_and_creation_without_custom_inferen
     )
     pipeline = Pipeline(
         name=pipeline_name,
-        parameters=[instance_count],
+        parameters=[instance_count, instance_type],
         steps=[step_train, step_model_regis, step_model_create, step_fail],
         sagemaker_session=pipeline_session,
     )
@@ -204,16 +203,15 @@ def test_pytorch_training_model_registration_and_creation_with_custom_inference(
     inputs = TrainingInput(s3_data=input_path)
 
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
+    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
 
-    # If image_uri is not provided, the instance_type should not be a pipeline variable
-    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     pytorch_estimator = PyTorch(
         entry_point=entry_point,
         role=role,
         framework_version="1.5.0",
         py_version="py3",
         instance_count=instance_count,
-        instance_type="ml.m5.xlarge",
+        instance_type=instance_type,
         sagemaker_session=pipeline_session,
         output_kms_key=kms_key,
     )
@@ -269,7 +267,7 @@ def test_pytorch_training_model_registration_and_creation_with_custom_inference(
     )
     pipeline = Pipeline(
         name=pipeline_name,
-        parameters=[instance_count],
+        parameters=[instance_count, instance_type],
         steps=[step_train, step_model_regis, step_model_create, step_fail],
         sagemaker_session=pipeline_session,
     )
@@ -402,6 +400,7 @@ def test_model_registration_with_drift_check_baselines_and_model_metrics(
     pipeline_name,
 ):
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
+    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
 
     # upload model data to s3
     model_local_path = os.path.join(DATA_DIR, "mxnet_mnist/model.tar.gz")
@@ -489,12 +488,10 @@ def test_model_registration_with_drift_check_baselines_and_model_metrics(
         ),
     )
     customer_metadata_properties = {"key1": "value1"}
-    # If image_uri is not provided, the instance_type should not be a pipeline variable
-    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     estimator = XGBoost(
         entry_point="training.py",
         source_dir=os.path.join(DATA_DIR, "sip"),
-        instance_type="ml.m5.xlarge",
+        instance_type=instance_type,
         instance_count=instance_count,
         framework_version="0.90-2",
         sagemaker_session=pipeline_session,
@@ -527,6 +524,7 @@ def test_model_registration_with_drift_check_baselines_and_model_metrics(
         parameters=[
             model_uri_param,
             metrics_uri_param,
+            instance_type,
             instance_count,
         ],
         steps=[step_model_register],
@@ -608,14 +606,13 @@ def test_model_registration_with_tensorflow_model_with_pipeline_model(
     )
     inputs = TrainingInput(s3_data=input_path)
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
+    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
 
-    # If image_uri is not provided, the instance_type should not be a pipeline variable
-    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     tensorflow_estimator = TensorFlow(
         entry_point=entry_point,
         role=role,
         instance_count=instance_count,
-        instance_type="ml.m5.xlarge",
+        instance_type=instance_type,
         framework_version=tf_full_version,
         py_version=tf_full_py_version,
         sagemaker_session=pipeline_session,
@@ -648,7 +645,10 @@ def test_model_registration_with_tensorflow_model_with_pipeline_model(
     )
     pipeline = Pipeline(
         name=pipeline_name,
-        parameters=[instance_count],
+        parameters=[
+            instance_count,
+            instance_type,
+        ],
         steps=[step_train, step_register_model],
         sagemaker_session=pipeline_session,
     )

@@ -31,7 +31,10 @@
     _REGISTER_MODEL_NAME_BASE,
     _CREATE_MODEL_NAME_BASE,
 )
-from sagemaker.workflow.parameters import ParameterInteger
+from sagemaker.workflow.parameters import (
+    ParameterInteger,
+    ParameterString,
+)
 from sagemaker.pytorch.estimator import PyTorch
 from sagemaker.workflow.pipeline import Pipeline
 from sagemaker.workflow.retry import (
@@ -182,11 +185,9 @@ def test_model_registration_with_model_repack(
     inputs = TrainingInput(s3_data=input_path)
 
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
-    instance_type = "ml.m5.xlarge"
+    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
     good_enough_input = ParameterInteger(name="GoodEnoughInput", default_value=1)
 
-    # If image_uri is not provided, the instance_type should not be a pipeline variable
-    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     pytorch_estimator = PyTorch(
         entry_point=entry_point,
         role=role,
@@ -251,7 +252,7 @@ def test_model_registration_with_model_repack(
     )
     pipeline = Pipeline(
         name=pipeline_name,
-        parameters=[good_enough_input, instance_count],
+        parameters=[good_enough_input, instance_count, instance_type],
         steps=[step_train, step_cond],
         sagemaker_session=pipeline_session,
     )

@@ -59,7 +59,7 @@ def test_training_job_with_debugger_and_profiler(
     pytorch_training_latest_py_version,
 ):
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
-    instance_type = "ml.m5.xlarge"
+    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
 
     rules = [
         Rule.sagemaker(rule_configs.vanishing_gradient()),
@@ -78,8 +78,6 @@ def test_training_job_with_debugger_and_profiler(
     )
     inputs = TrainingInput(s3_data=input_path)
 
-    # If image_uri is not provided, the instance_type should not be a pipeline variable
-    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     pytorch_estimator = PyTorch(
         entry_point=script_path,
         role="SageMakerRole",
@@ -100,7 +98,7 @@ def test_training_job_with_debugger_and_profiler(
 
     pipeline = Pipeline(
         name=pipeline_name,
-        parameters=[instance_count],
+        parameters=[instance_count, instance_type],
         steps=[step_train],
         sagemaker_session=sagemaker_session,
     )

@@ -91,10 +91,8 @@ def test_tuning_single_algo(
     inputs = TrainingInput(s3_data=input_path)
 
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
-    instance_type = "ml.m5.xlarge"
+    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
 
-    # If image_uri is not provided, the instance_type should not be a pipeline variable
-    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     pytorch_estimator = PyTorch(
         entry_point=entry_point,
         role=role,
@@ -169,7 +167,7 @@ def test_tuning_single_algo(
 
     pipeline = Pipeline(
         name=pipeline_name,
-        parameters=[instance_count, min_batch_size, max_batch_size],
+        parameters=[instance_count, instance_type, min_batch_size, max_batch_size],
         steps=[step_tune, step_best_model, step_second_best_model],
         sagemaker_session=pipeline_session,
     )
@@ -223,12 +221,10 @@ def test_tuning_multi_algos(
     )
 
     instance_count = ParameterInteger(name="InstanceCount", default_value=1)
-    instance_type = "ml.m5.xlarge"
+    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
 
     input_data = f"s3://sagemaker-sample-data-{region_name}/processing/census/census-income.csv"
 
-    # The instance_type should not be a pipeline variable
-    # since it is used to retrieve image_uri in compile time (PySDK)
     sklearn_processor = SKLearnProcessor(
         framework_version="0.20.0",
         instance_type=instance_type,
@@ -263,8 +259,6 @@ def test_tuning_multi_algos(
     json_get_hp = JsonGet(
         step_name=step_process.name, property_file=property_file, json_path="train_size"
     )
-    # If image_uri is not provided, the instance_type should not be a pipeline variable
-    # since instance_type is used to retrieve image_uri in compile time (PySDK)
     pytorch_estimator = PyTorch(
         entry_point=entry_point,
         role=role,
@@ -313,7 +307,7 @@ def test_tuning_multi_algos(
 
     pipeline = Pipeline(
         name=pipeline_name,
-        parameters=[instance_count, min_batch_size, max_batch_size, static_hp_1],
+        parameters=[instance_count, instance_type, min_batch_size, max_batch_size],
         steps=[step_process, step_tune],
         sagemaker_session=sagemaker_session,
     )