Skip to content

Revert "fix: Prevent passing PipelineVariable object into image_uris.retrieve (#3054)" #3121

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 1 addition & 8 deletions src/sagemaker/image_uris.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
from sagemaker.jumpstart.utils import is_jumpstart_model_input
from sagemaker.spark import defaults
from sagemaker.jumpstart import artifacts
from sagemaker.workflow import is_pipeline_variable

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -105,17 +104,11 @@ def retrieve(

Raises:
NotImplementedError: If the scope is not supported.
ValueError: If the combination of arguments specified is not supported or
any PipelineVariable object is passed in.
ValueError: If the combination of arguments specified is not supported.
VulnerableJumpStartModelError: If any of the dependencies required by the script have
known security vulnerabilities.
DeprecatedJumpStartModelError: If the version of the model is deprecated.
"""
args = dict(locals())
for name, val in args.items():
if is_pipeline_variable(val):
raise ValueError("%s should not be a pipeline variable (%s)" % (name, type(val)))

if is_jumpstart_model_input(model_id, model_version):
return artifacts._retrieve_image_uri(
model_id,
Expand Down
6 changes: 1 addition & 5 deletions src/sagemaker/workflow/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,7 @@ def __add__(self, other: Union[Expression, PrimitiveType]):

def __str__(self):
"""Override built-in String function for PipelineVariable"""
raise TypeError(
"Pipeline variables do not support __str__ operation. "
"Please use `.to_string()` to convert it to string type in execution time"
"or use `.expr` to translate it to Json for display purpose in Python SDK."
)
raise TypeError("Pipeline variables do not support __str__ operation.")

def __int__(self):
"""Override built-in Integer function for PipelineVariable"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,10 @@ def test_conditional_pytorch_training_model_registration(
inputs = TrainingInput(s3_data=input_path)

instance_count = ParameterInteger(name="InstanceCount", default_value=1)
instance_type = "ml.m5.xlarge"
instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
good_enough_input = ParameterInteger(name="GoodEnoughInput", default_value=1)
in_condition_input = ParameterString(name="Foo", default_value="Foo")

# If image_uri is not provided, the instance_type should not be a pipeline variable
# since instance_type is used to retrieve image_uri in compile time (PySDK)
pytorch_estimator = PyTorch(
entry_point=entry_point,
role=role,
Expand Down Expand Up @@ -155,6 +153,7 @@ def test_conditional_pytorch_training_model_registration(
in_condition_input,
good_enough_input,
instance_count,
instance_type,
],
steps=[step_train, step_cond],
sagemaker_session=sagemaker_session,
Expand Down Expand Up @@ -260,10 +259,8 @@ def test_sklearn_xgboost_sip_model_registration(
prefix = "sip"
bucket_name = sagemaker_session.default_bucket()
instance_count = ParameterInteger(name="InstanceCount", default_value=1)
instance_type = "ml.m5.xlarge"
instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")

# The instance_type should not be a pipeline variable
# since it is used to retrieve image_uri in compile time (PySDK)
sklearn_processor = SKLearnProcessor(
role=role,
instance_type=instance_type,
Expand Down Expand Up @@ -334,8 +331,6 @@ def test_sklearn_xgboost_sip_model_registration(
source_dir = base_dir
code_location = "s3://{0}/{1}/code".format(bucket_name, prefix)

# If image_uri is not provided, the instance_type should not be a pipeline variable
# since instance_type is used to retrieve image_uri in compile time (PySDK)
estimator = XGBoost(
entry_point=entry_point,
source_dir=source_dir,
Expand Down Expand Up @@ -421,6 +416,7 @@ def test_sklearn_xgboost_sip_model_registration(
train_data_path_param,
val_data_path_param,
model_path_param,
instance_type,
instance_count,
output_path_param,
],
Expand Down Expand Up @@ -466,7 +462,7 @@ def test_model_registration_with_drift_check_baselines(
pipeline_name,
):
instance_count = ParameterInteger(name="InstanceCount", default_value=1)
instance_type = "ml.m5.xlarge"
instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")

# upload model data to s3
model_local_path = os.path.join(DATA_DIR, "mxnet_mnist/model.tar.gz")
Expand Down Expand Up @@ -554,9 +550,6 @@ def test_model_registration_with_drift_check_baselines(
),
)
customer_metadata_properties = {"key1": "value1"}

# If image_uri is not provided, the instance_type should not be a pipeline variable
# since instance_type is used to retrieve image_uri in compile time (PySDK)
estimator = XGBoost(
entry_point="training.py",
source_dir=os.path.join(DATA_DIR, "sip"),
Expand Down Expand Up @@ -586,6 +579,7 @@ def test_model_registration_with_drift_check_baselines(
parameters=[
model_uri_param,
metrics_uri_param,
instance_type,
instance_count,
],
steps=[step_register],
Expand Down Expand Up @@ -673,11 +667,9 @@ def test_model_registration_with_model_repack(
inputs = TrainingInput(s3_data=input_path)

instance_count = ParameterInteger(name="InstanceCount", default_value=1)
instance_type = "ml.m5.xlarge"
instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
good_enough_input = ParameterInteger(name="GoodEnoughInput", default_value=1)

# If image_uri is not provided, the instance_type should not be a pipeline variable
# since instance_type is used to retrieve image_uri in compile time (PySDK)
pytorch_estimator = PyTorch(
entry_point=entry_point,
role=role,
Expand Down Expand Up @@ -732,7 +724,7 @@ def test_model_registration_with_model_repack(

pipeline = Pipeline(
name=pipeline_name,
parameters=[good_enough_input, instance_count],
parameters=[good_enough_input, instance_count, instance_type],
steps=[step_cond],
sagemaker_session=sagemaker_session,
)
Expand Down Expand Up @@ -775,10 +767,8 @@ def test_model_registration_with_tensorflow_model_with_pipeline_model(
inputs = TrainingInput(s3_data=input_path)

instance_count = ParameterInteger(name="InstanceCount", default_value=1)
instance_type = "ml.m5.xlarge"
instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")

# If image_uri is not provided, the instance_type should not be a pipeline variable
# since instance_type is used to retrieve image_uri in compile time (PySDK)
tensorflow_estimator = TensorFlow(
entry_point=entry_point,
role=role,
Expand Down Expand Up @@ -819,7 +809,10 @@ def test_model_registration_with_tensorflow_model_with_pipeline_model(

pipeline = Pipeline(
name=pipeline_name,
parameters=[instance_count],
parameters=[
instance_count,
instance_type,
],
steps=[step_train, step_register_model],
sagemaker_session=sagemaker_session,
)
Expand Down
30 changes: 15 additions & 15 deletions tests/integ/sagemaker/workflow/test_model_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,16 +83,15 @@ def test_pytorch_training_model_registration_and_creation_without_custom_inferen
inputs = TrainingInput(s3_data=input_path)

instance_count = ParameterInteger(name="InstanceCount", default_value=1)
instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")

# If image_uri is not provided, the instance_type should not be a pipeline variable
# since instance_type is used to retrieve image_uri in compile time (PySDK)
pytorch_estimator = PyTorch(
entry_point=entry_point,
role=role,
framework_version="1.5.0",
py_version="py3",
instance_count=instance_count,
instance_type="ml.m5.xlarge",
instance_type=instance_type,
sagemaker_session=pipeline_session,
)
train_step_args = pytorch_estimator.fit(inputs=inputs)
Expand Down Expand Up @@ -141,7 +140,7 @@ def test_pytorch_training_model_registration_and_creation_without_custom_inferen
)
pipeline = Pipeline(
name=pipeline_name,
parameters=[instance_count],
parameters=[instance_count, instance_type],
steps=[step_train, step_model_regis, step_model_create, step_fail],
sagemaker_session=pipeline_session,
)
Expand Down Expand Up @@ -204,16 +203,15 @@ def test_pytorch_training_model_registration_and_creation_with_custom_inference(
inputs = TrainingInput(s3_data=input_path)

instance_count = ParameterInteger(name="InstanceCount", default_value=1)
instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")

# If image_uri is not provided, the instance_type should not be a pipeline variable
# since instance_type is used to retrieve image_uri in compile time (PySDK)
pytorch_estimator = PyTorch(
entry_point=entry_point,
role=role,
framework_version="1.5.0",
py_version="py3",
instance_count=instance_count,
instance_type="ml.m5.xlarge",
instance_type=instance_type,
sagemaker_session=pipeline_session,
output_kms_key=kms_key,
)
Expand Down Expand Up @@ -269,7 +267,7 @@ def test_pytorch_training_model_registration_and_creation_with_custom_inference(
)
pipeline = Pipeline(
name=pipeline_name,
parameters=[instance_count],
parameters=[instance_count, instance_type],
steps=[step_train, step_model_regis, step_model_create, step_fail],
sagemaker_session=pipeline_session,
)
Expand Down Expand Up @@ -402,6 +400,7 @@ def test_model_registration_with_drift_check_baselines_and_model_metrics(
pipeline_name,
):
instance_count = ParameterInteger(name="InstanceCount", default_value=1)
instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")

# upload model data to s3
model_local_path = os.path.join(DATA_DIR, "mxnet_mnist/model.tar.gz")
Expand Down Expand Up @@ -489,12 +488,10 @@ def test_model_registration_with_drift_check_baselines_and_model_metrics(
),
)
customer_metadata_properties = {"key1": "value1"}
# If image_uri is not provided, the instance_type should not be a pipeline variable
# since instance_type is used to retrieve image_uri in compile time (PySDK)
estimator = XGBoost(
entry_point="training.py",
source_dir=os.path.join(DATA_DIR, "sip"),
instance_type="ml.m5.xlarge",
instance_type=instance_type,
instance_count=instance_count,
framework_version="0.90-2",
sagemaker_session=pipeline_session,
Expand Down Expand Up @@ -527,6 +524,7 @@ def test_model_registration_with_drift_check_baselines_and_model_metrics(
parameters=[
model_uri_param,
metrics_uri_param,
instance_type,
instance_count,
],
steps=[step_model_register],
Expand Down Expand Up @@ -608,14 +606,13 @@ def test_model_registration_with_tensorflow_model_with_pipeline_model(
)
inputs = TrainingInput(s3_data=input_path)
instance_count = ParameterInteger(name="InstanceCount", default_value=1)
instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")

# If image_uri is not provided, the instance_type should not be a pipeline variable
# since instance_type is used to retrieve image_uri in compile time (PySDK)
tensorflow_estimator = TensorFlow(
entry_point=entry_point,
role=role,
instance_count=instance_count,
instance_type="ml.m5.xlarge",
instance_type=instance_type,
framework_version=tf_full_version,
py_version=tf_full_py_version,
sagemaker_session=pipeline_session,
Expand Down Expand Up @@ -648,7 +645,10 @@ def test_model_registration_with_tensorflow_model_with_pipeline_model(
)
pipeline = Pipeline(
name=pipeline_name,
parameters=[instance_count],
parameters=[
instance_count,
instance_type,
],
steps=[step_train, step_register_model],
sagemaker_session=pipeline_session,
)
Expand Down
11 changes: 6 additions & 5 deletions tests/integ/sagemaker/workflow/test_retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@
_REGISTER_MODEL_NAME_BASE,
_CREATE_MODEL_NAME_BASE,
)
from sagemaker.workflow.parameters import ParameterInteger
from sagemaker.workflow.parameters import (
ParameterInteger,
ParameterString,
)
from sagemaker.pytorch.estimator import PyTorch
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.retry import (
Expand Down Expand Up @@ -182,11 +185,9 @@ def test_model_registration_with_model_repack(
inputs = TrainingInput(s3_data=input_path)

instance_count = ParameterInteger(name="InstanceCount", default_value=1)
instance_type = "ml.m5.xlarge"
instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
good_enough_input = ParameterInteger(name="GoodEnoughInput", default_value=1)

# If image_uri is not provided, the instance_type should not be a pipeline variable
# since instance_type is used to retrieve image_uri in compile time (PySDK)
pytorch_estimator = PyTorch(
entry_point=entry_point,
role=role,
Expand Down Expand Up @@ -251,7 +252,7 @@ def test_model_registration_with_model_repack(
)
pipeline = Pipeline(
name=pipeline_name,
parameters=[good_enough_input, instance_count],
parameters=[good_enough_input, instance_count, instance_type],
steps=[step_train, step_cond],
sagemaker_session=pipeline_session,
)
Expand Down
6 changes: 2 additions & 4 deletions tests/integ/sagemaker/workflow/test_training_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def test_training_job_with_debugger_and_profiler(
pytorch_training_latest_py_version,
):
instance_count = ParameterInteger(name="InstanceCount", default_value=1)
instance_type = "ml.m5.xlarge"
instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")

rules = [
Rule.sagemaker(rule_configs.vanishing_gradient()),
Expand All @@ -78,8 +78,6 @@ def test_training_job_with_debugger_and_profiler(
)
inputs = TrainingInput(s3_data=input_path)

# If image_uri is not provided, the instance_type should not be a pipeline variable
# since instance_type is used to retrieve image_uri in compile time (PySDK)
pytorch_estimator = PyTorch(
entry_point=script_path,
role="SageMakerRole",
Expand All @@ -100,7 +98,7 @@ def test_training_job_with_debugger_and_profiler(

pipeline = Pipeline(
name=pipeline_name,
parameters=[instance_count],
parameters=[instance_count, instance_type],
steps=[step_train],
sagemaker_session=sagemaker_session,
)
Expand Down
14 changes: 4 additions & 10 deletions tests/integ/sagemaker/workflow/test_tuning_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,8 @@ def test_tuning_single_algo(
inputs = TrainingInput(s3_data=input_path)

instance_count = ParameterInteger(name="InstanceCount", default_value=1)
instance_type = "ml.m5.xlarge"
instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")

# If image_uri is not provided, the instance_type should not be a pipeline variable
# since instance_type is used to retrieve image_uri in compile time (PySDK)
pytorch_estimator = PyTorch(
entry_point=entry_point,
role=role,
Expand Down Expand Up @@ -169,7 +167,7 @@ def test_tuning_single_algo(

pipeline = Pipeline(
name=pipeline_name,
parameters=[instance_count, min_batch_size, max_batch_size],
parameters=[instance_count, instance_type, min_batch_size, max_batch_size],
steps=[step_tune, step_best_model, step_second_best_model],
sagemaker_session=pipeline_session,
)
Expand Down Expand Up @@ -223,12 +221,10 @@ def test_tuning_multi_algos(
)

instance_count = ParameterInteger(name="InstanceCount", default_value=1)
instance_type = "ml.m5.xlarge"
instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")

input_data = f"s3://sagemaker-sample-data-{region_name}/processing/census/census-income.csv"

# The instance_type should not be a pipeline variable
# since it is used to retrieve image_uri in compile time (PySDK)
sklearn_processor = SKLearnProcessor(
framework_version="0.20.0",
instance_type=instance_type,
Expand Down Expand Up @@ -263,8 +259,6 @@ def test_tuning_multi_algos(
json_get_hp = JsonGet(
step_name=step_process.name, property_file=property_file, json_path="train_size"
)
# If image_uri is not provided, the instance_type should not be a pipeline variable
# since instance_type is used to retrieve image_uri in compile time (PySDK)
pytorch_estimator = PyTorch(
entry_point=entry_point,
role=role,
Expand Down Expand Up @@ -313,7 +307,7 @@ def test_tuning_multi_algos(

pipeline = Pipeline(
name=pipeline_name,
parameters=[instance_count, min_batch_size, max_batch_size, static_hp_1],
parameters=[instance_count, instance_type, min_batch_size, max_batch_size],
steps=[step_process, step_tune],
sagemaker_session=sagemaker_session,
)
Expand Down
Loading