diff --git a/src/sagemaker/workflow/_utils.py b/src/sagemaker/workflow/_utils.py index 8ba65f1eee..cdef9537c1 100644 --- a/src/sagemaker/workflow/_utils.py +++ b/src/sagemaker/workflow/_utils.py @@ -13,6 +13,7 @@ """Scrapper utilities to support repacking of models.""" from __future__ import absolute_import +import logging import os import shutil import tarfile @@ -37,6 +38,8 @@ if TYPE_CHECKING: from sagemaker.workflow.step_collections import StepCollection +logger = logging.getLogger(__name__) + FRAMEWORK_VERSION = "0.23-1" INSTANCE_TYPE = "ml.m5.large" REPACK_SCRIPT = "_repack_model.py" @@ -479,10 +482,19 @@ def arguments(self) -> RequestType: request_dict = get_create_model_package_request(**model_package_args) # these are not available in the workflow service and will cause rejection + warn_msg_template = ( + "Popping out '%s' from the pipeline definition " + "since it will be overridden in pipeline execution time." + ) if "CertifyForMarketplace" in request_dict: request_dict.pop("CertifyForMarketplace") + logger.warning(warn_msg_template, "CertifyForMarketplace") if "Description" in request_dict: request_dict.pop("Description") + logger.warning(warn_msg_template, "Description") + if "ModelPackageName" in request_dict: + request_dict.pop("ModelPackageName") + logger.warning(warn_msg_template, "ModelPackageName") return request_dict diff --git a/tests/integ/sagemaker/workflow/test_model_steps.py b/tests/integ/sagemaker/workflow/test_model_steps.py index 31c518b100..f25723c440 100644 --- a/tests/integ/sagemaker/workflow/test_model_steps.py +++ b/tests/integ/sagemaker/workflow/test_model_steps.py @@ -112,6 +112,7 @@ def test_pytorch_training_model_registration_and_creation_without_custom_inferen inference_instances=["ml.m5.xlarge"], transform_instances=["ml.m5.xlarge"], description="test-description", + model_package_name="model-pkg-name-will-be-popped-out", ) step_model_regis = ModelStep( name="pytorch-register-model", diff --git a/tests/unit/sagemaker/workflow/conftest.py b/tests/unit/sagemaker/workflow/conftest.py new file mode 100644 index 0000000000..9ea3d0bcac --- /dev/null +++ b/tests/unit/sagemaker/workflow/conftest.py @@ -0,0 +1,75 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from __future__ import absolute_import + +from unittest.mock import Mock, PropertyMock + +import pytest + +from sagemaker import Session +from sagemaker.workflow.pipeline_context import PipelineSession + +REGION = "us-west-2" +BUCKET = "my-bucket" +ROLE = "DummyRole" +IMAGE_URI = "fakeimage" + + +@pytest.fixture(scope="module") +def client(): + """Mock client. + + Considerations when appropriate: + + * utilize botocore.stub.Stubber + * separate runtime client from client + """ + client_mock = Mock() + client_mock._client_config.user_agent = ( + "Boto3/1.14.24 Python/3.8.5 Linux/5.4.0-42-generic Botocore/1.17.24 Resource" + ) + return client_mock + + +@pytest.fixture(scope="module") +def boto_session(client): + role_mock = Mock() + type(role_mock).arn = PropertyMock(return_value=ROLE) + + resource_mock = Mock() + resource_mock.Role.return_value = role_mock + + session_mock = Mock(region_name=REGION) + session_mock.resource.return_value = resource_mock + session_mock.client.return_value = client + + return session_mock + + +@pytest.fixture(scope="module") +def pipeline_session(boto_session, client): + return PipelineSession( + boto_session=boto_session, + sagemaker_client=client, + default_bucket=BUCKET, + ) + + +@pytest.fixture(scope="module") +def sagemaker_session(boto_session, client): + return Session( + boto_session=boto_session, + sagemaker_client=client, + sagemaker_runtime_client=client, + default_bucket=BUCKET, + ) diff --git a/tests/unit/sagemaker/workflow/test_model_step.py b/tests/unit/sagemaker/workflow/test_model_step.py index 080e70ca62..2216299d3b 100644 --- a/tests/unit/sagemaker/workflow/test_model_step.py +++ b/tests/unit/sagemaker/workflow/test_model_step.py @@ -15,7 +15,7 @@ import json import os -from mock import Mock, PropertyMock, patch +from mock import patch import pytest @@ -43,7 +43,6 @@ ) from sagemaker.workflow.parameters import ParameterString, ParameterInteger from sagemaker.workflow.pipeline import Pipeline, PipelineGraph -from sagemaker.workflow.pipeline_context import PipelineSession from sagemaker.workflow.retry import ( StepRetryPolicy, StepExceptionTypeEnum, @@ -55,11 +54,9 @@ from sagemaker.workflow.lambda_step import LambdaStep, LambdaOutput, LambdaOutputTypeEnum from tests.unit import DATA_DIR from tests.unit.sagemaker.workflow.helpers import CustomStep, ordered +from tests.unit.sagemaker.workflow.conftest import BUCKET, ROLE _IMAGE_URI = "fakeimage" -_REGION = "us-west-2" -_BUCKET = "my-bucket" -_ROLE = "DummyRole" _INSTANCE_TYPE = "ml.m4.xlarge" _SAGEMAKER_PROGRAM = SCRIPT_PARAM_NAME.upper() @@ -69,60 +66,10 @@ _XGBOOST_PATH = os.path.join(DATA_DIR, "xgboost_abalone") _TENSORFLOW_PATH = os.path.join(DATA_DIR, "tfs/tfs-test-entrypoint-and-dependencies") _REPACK_OUTPUT_KEY_PREFIX = "code-output" -_MODEL_CODE_LOCATION = f"s3://{_BUCKET}/{_REPACK_OUTPUT_KEY_PREFIX}" +_MODEL_CODE_LOCATION = f"s3://{BUCKET}/{_REPACK_OUTPUT_KEY_PREFIX}" _MODEL_CODE_LOCATION_TRAILING_SLASH = _MODEL_CODE_LOCATION + "/" -@pytest.fixture -def client(): - """Mock client. - - Considerations when appropriate: - - * utilize botocore.stub.Stubber - * separate runtime client from client - """ - client_mock = Mock() - client_mock._client_config.user_agent = ( - "Boto3/1.14.24 Python/3.8.5 Linux/5.4.0-42-generic Botocore/1.17.24 Resource" - ) - return client_mock - - -@pytest.fixture -def boto_session(client): - role_mock = Mock() - type(role_mock).arn = PropertyMock(return_value=_ROLE) - - resource_mock = Mock() - resource_mock.Role.return_value = role_mock - - session_mock = Mock(region_name=_REGION) - session_mock.resource.return_value = resource_mock - session_mock.client.return_value = client - - return session_mock - - -@pytest.fixture -def pipeline_session(boto_session, client): - return PipelineSession( - boto_session=boto_session, - sagemaker_client=client, - default_bucket=_BUCKET, - ) - - -@pytest.fixture -def sagemaker_session(boto_session, client): - return Session( - boto_session=boto_session, - sagemaker_client=client, - sagemaker_runtime_client=client, - default_bucket=_BUCKET, - ) - - @pytest.fixture def model_data_param(): return ParameterString(name="ModelData", default_value="s3://my-bucket/file") @@ -137,7 +84,7 @@ def model(pipeline_session, model_data_param): sagemaker_session=pipeline_session, entry_point=f"{DATA_DIR}/{_SCRIPT_NAME}", source_dir=f"{DATA_DIR}", - role=_ROLE, + role=ROLE, ) @@ -322,13 +269,13 @@ def test_create_pipeline_model_with_runtime_repack(pipeline_session, model_data_ sparkml_model = SparkMLModel( name="MySparkMLModel", model_data=model_data_param, - role=_ROLE, + role=ROLE, sagemaker_session=pipeline_session, env={"SAGEMAKER_DEFAULT_INVOCATIONS_ACCEPT": "text/csv"}, ) # The model need to runtime repack ppl_model = PipelineModel( - models=[sparkml_model, model], role=_ROLE, sagemaker_session=pipeline_session + models=[sparkml_model, model], role=ROLE, sagemaker_session=pipeline_session ) step_args = ppl_model.create( instance_type="c4.4xlarge", @@ -417,7 +364,7 @@ def test_register_pipeline_model_with_runtime_repack(pipeline_session, model_dat # The model no need to runtime repack, since source_dir is missing sparkml_model = SparkMLModel( model_data=model_data_param, - role=_ROLE, + role=ROLE, sagemaker_session=pipeline_session, env={"SAGEMAKER_DEFAULT_INVOCATIONS_ACCEPT": "text/csv"}, entry_point=f"{DATA_DIR}/{_SCRIPT_NAME}", @@ -429,11 +376,11 @@ def test_register_pipeline_model_with_runtime_repack(pipeline_session, model_dat sagemaker_session=pipeline_session, entry_point=f"{DATA_DIR}/{_SCRIPT_NAME}", source_dir=f"{DATA_DIR}", - role=_ROLE, + role=ROLE, env={"k": "v"}, ) model = PipelineModel( - models=[sparkml_model, model], role=_ROLE, sagemaker_session=pipeline_session + models=[sparkml_model, model], role=ROLE, sagemaker_session=pipeline_session ) step_args = model.register( content_types=["text/csv"], @@ -516,7 +463,7 @@ def test_register_model_without_repack(pipeline_session): model_data=model_data, entry_point=f"{DATA_DIR}/{_SCRIPT_NAME}", sagemaker_session=pipeline_session, - role=_ROLE, + role=ROLE, ) step_args = model.register( content_types=["text/csv"], @@ -547,7 +494,7 @@ def test_register_model_without_repack(pipeline_session): assert containers[0]["Environment"][_SAGEMAKER_PROGRAM] == _SCRIPT_NAME assert ( containers[0]["Environment"][_SAGEMAKER_SUBMIT_DIRECTORY] - == f"s3://{_BUCKET}/{model_name}/sourcedir.tar.gz" + == f"s3://{BUCKET}/{model_name}/sourcedir.tar.gz" ) adjacency_list = PipelineGraph.from_pipeline(pipeline).adjacency_list assert ordered(adjacency_list) == ordered({"MyModelStep-RegisterModel": []}) @@ -560,11 +507,11 @@ def test_create_model_with_compile_time_repack(mock_repack, pipeline_session): model = Model( name=model_name, image_uri=_IMAGE_URI, - model_data=f"s3://{_BUCKET}/model.tar.gz", + model_data=f"s3://{BUCKET}/model.tar.gz", sagemaker_session=pipeline_session, entry_point=f"{DATA_DIR}/{_SCRIPT_NAME}", source_dir=f"{DATA_DIR}", - role=_ROLE, + role=ROLE, ) step_args = model.create( instance_type="c4.4xlarge", @@ -582,7 +529,7 @@ def test_create_model_with_compile_time_repack(mock_repack, pipeline_session): arguments = step_dsl_list[0]["Arguments"] assert arguments["PrimaryContainer"]["Image"] == _IMAGE_URI assert ( - arguments["PrimaryContainer"]["ModelDataUrl"] == f"s3://{_BUCKET}/{model_name}/model.tar.gz" + arguments["PrimaryContainer"]["ModelDataUrl"] == f"s3://{BUCKET}/{model_name}/model.tar.gz" ) assert arguments["PrimaryContainer"]["Environment"][_SAGEMAKER_PROGRAM] == _SCRIPT_NAME assert arguments["PrimaryContainer"]["Environment"][_SAGEMAKER_SUBMIT_DIRECTORY] == _DIR_NAME @@ -700,7 +647,7 @@ def test_conditional_model_create_and_regis( model_data="dummy_model_data", image_uri=_IMAGE_URI, entry_point=f"{DATA_DIR}/{_SCRIPT_NAME}", - role=_ROLE, + role=ROLE, enable_network_isolation=True, code_location=_MODEL_CODE_LOCATION_TRAILING_SLASH, ), @@ -713,7 +660,7 @@ def test_conditional_model_create_and_regis( framework_version="1.11.0", image_uri=_IMAGE_URI, entry_point=f"{DATA_DIR}/{_SCRIPT_NAME}", - role=_ROLE, + role=ROLE, enable_network_isolation=False, ), 1, @@ -724,7 +671,7 @@ def test_conditional_model_create_and_regis( model_data="dummy_model_data", image_uri=_IMAGE_URI, entry_point=f"{DATA_DIR}/{_SCRIPT_NAME}", - role=_ROLE, + role=ROLE, framework_version="1.5.0", code_location=_MODEL_CODE_LOCATION_TRAILING_SLASH, ), @@ -736,7 +683,7 @@ def test_conditional_model_create_and_regis( model_data="dummy_model_data", image_uri=_IMAGE_URI, entry_point=f"{DATA_DIR}/{_SCRIPT_NAME}", - role=_ROLE, + role=ROLE, framework_version="1.2.0", ), 1, @@ -747,7 +694,7 @@ def test_conditional_model_create_and_regis( model_data="dummy_model_data", image_uri=_IMAGE_URI, entry_point=f"{DATA_DIR}/{_SCRIPT_NAME}", - role=_ROLE, + role=ROLE, ), 2, ), @@ -757,7 +704,7 @@ def test_conditional_model_create_and_regis( model_data="dummy_model_data", image_uri=_IMAGE_URI, entry_point=f"{DATA_DIR}/{_SCRIPT_NAME}", - role=_ROLE, + role=ROLE, code_location=_MODEL_CODE_LOCATION_TRAILING_SLASH, ), 2, @@ -768,7 +715,7 @@ def test_conditional_model_create_and_regis( model_data="dummy_model_data", image_uri=_IMAGE_URI, entry_point=f"{DATA_DIR}/{_SCRIPT_NAME}", - role=_ROLE, + role=ROLE, ), 1, ), @@ -789,7 +736,7 @@ def assert_test_result(steps: list): ) else: assert steps[0]["Arguments"]["OutputDataConfig"]["S3OutputPath"] == ( - f"s3://{_BUCKET}/{model.name}" + f"s3://{BUCKET}/{model.name}" ) model, expected_step_num = test_input @@ -828,7 +775,7 @@ def assert_test_result(steps: list): XGBoostModel( model_data="dummy_model_step", framework_version="1.3-1", - role=_ROLE, + role=ROLE, entry_point=os.path.join(_XGBOOST_PATH, "inference.py"), enable_network_isolation=True, ), @@ -845,7 +792,7 @@ def assert_test_result(steps: list): XGBoostModel( model_data="dummy_model_step", framework_version="1.3-1", - role=_ROLE, + role=ROLE, entry_point=os.path.join(_XGBOOST_PATH, "inference.py"), ), { @@ -861,7 +808,7 @@ def assert_test_result(steps: list): XGBoostModel( model_data="dummy_model_step", framework_version="1.3-1", - role=_ROLE, + role=ROLE, entry_point=None, ), { @@ -876,9 +823,8 @@ def assert_test_result(steps: list): ( TensorFlowModel( model_data="dummy_model_step", - role=_ROLE, + role=ROLE, image_uri=_IMAGE_URI, - sagemaker_session=pipeline_session, entry_point=os.path.join(_TENSORFLOW_PATH, "inference.py"), ), { @@ -893,9 +839,8 @@ def assert_test_result(steps: list): ( TensorFlowModel( model_data="dummy_model_step", - role=_ROLE, + role=ROLE, image_uri=_IMAGE_URI, - sagemaker_session=pipeline_session, ), { "expected_step_num": 1, @@ -941,7 +886,7 @@ def test_request_compare_of_register_model_under_different_sessions( _verify_register_model_container_definition(regis_step_arg, expect, dict) # Get create model package request under Session - model.model_data = f"s3://{_BUCKET}" + model.model_data = f"s3://{BUCKET}" model.sagemaker_session = sagemaker_session with patch.object( Session, "_intercept_create_request", return_value=dict(ModelPackageArn="arn:aws") @@ -996,7 +941,7 @@ def test_model_step_with_lambda_property_reference(pipeline_session): model_data=lambda_step.properties.Outputs["model_artifact"], sagemaker_session=pipeline_session, entry_point=f"{DATA_DIR}/{_SCRIPT_NAME}", - role=_ROLE, + role=ROLE, ) step_create_model = ModelStep(name="mymodelstep", step_args=model.create()) @@ -1031,7 +976,7 @@ def test_model_step_with_lambda_property_reference(pipeline_session): ( Processor( image_uri=_IMAGE_URI, - role=_ROLE, + role=ROLE, instance_count=1, instance_type=_INSTANCE_TYPE, ), @@ -1052,7 +997,7 @@ def test_model_step_with_lambda_property_reference(pipeline_session): ( HyperparameterTuner( estimator=Estimator( - role=_ROLE, + role=ROLE, instance_count=1, instance_type=_INSTANCE_TYPE, image_uri=_IMAGE_URI, @@ -1064,7 +1009,7 @@ def test_model_step_with_lambda_property_reference(pipeline_session): ), ( Estimator( - role=_ROLE, + role=ROLE, instance_count=1, instance_type=_INSTANCE_TYPE, image_uri=_IMAGE_URI, @@ -1128,3 +1073,31 @@ def test_pass_in_wrong_type_of_retry_policies(pipeline_session, model): ), ) assert "SageMakerJobStepRetryPolicy is not allowed for a create/registe" in str(error.value) + + +def test_register_model_step_with_model_package_name(pipeline_session): + model = Model( + name="MyModel", + image_uri="my-image", + model_data="s3://", + sagemaker_session=pipeline_session, + ) + step_args = model.register( + content_types=["text/csv"], + response_types=["text/csv"], + inference_instances=["ml.t2.medium", "ml.m5.xlarge"], + transform_instances=["ml.m5.xlarge"], + model_package_name="model-pkg-name-will-be-popped-out", + ) + regis_model_step = ModelStep( + name="MyModelStep", + step_args=step_args, + ) + pipeline = Pipeline( + name="MyPipeline", + steps=[regis_model_step], + sagemaker_session=pipeline_session, + ) + steps = json.loads(pipeline.definition())["Steps"] + assert len(steps) == 1 + assert "ModelPackageName" not in steps[0]["Arguments"] diff --git a/tests/unit/sagemaker/workflow/test_utils.py b/tests/unit/sagemaker/workflow/test_utils.py index dcbf5a6421..c8d86c5866 100644 --- a/tests/unit/sagemaker/workflow/test_utils.py +++ b/tests/unit/sagemaker/workflow/test_utils.py @@ -18,12 +18,6 @@ import tempfile import pytest -import sagemaker - -from mock import ( - Mock, - PropertyMock, -) from sagemaker.estimator import Estimator from sagemaker.workflow._utils import ( @@ -35,51 +29,7 @@ from sagemaker.workflow.properties import Properties from tests.unit.test_utils import FakeS3, list_tar_files from tests.unit import DATA_DIR - -REGION = "us-west-2" -BUCKET = "my-bucket" -IMAGE_URI = "fakeimage" -ROLE = "DummyRole" - - -@pytest.fixture -def boto_session(): - role_mock = Mock() - type(role_mock).arn = PropertyMock(return_value=ROLE) - - resource_mock = Mock() - resource_mock.Role.return_value = role_mock - - session_mock = Mock(region_name=REGION) - session_mock.resource.return_value = resource_mock - - return session_mock - - -@pytest.fixture -def client(): - """Mock client. - - Considerations when appropriate: - - * utilize botocore.stub.Stubber - * separate runtime client from client - """ - client_mock = Mock() - client_mock._client_config.user_agent = ( - "Boto3/1.14.24 Python/3.8.5 Linux/5.4.0-42-generic Botocore/1.17.24 Resource" - ) - return client_mock - - -@pytest.fixture -def sagemaker_session(boto_session, client): - return sagemaker.session.Session( - boto_session=boto_session, - sagemaker_client=client, - sagemaker_runtime_client=client, - default_bucket=BUCKET, - ) +from tests.unit.sagemaker.workflow.conftest import ROLE, IMAGE_URI, BUCKET @pytest.fixture @@ -171,7 +121,7 @@ def test_repack_model_step(estimator): } -def test_repack_model_step_with_invalid_input(): +def test_register_model_step_with_invalid_input(): # without both step_args and any of the old required arguments with pytest.raises(ValueError) as error: _RegisterModelStep(