Skip to content

Revert "feature: Decouple model.right_size() from model registry" #3700

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class Phase:
"""

def __init__(self, duration_in_seconds: int, initial_number_of_users: int, spawn_rate: int):
"""Initialize a `Phase`"""
"""Initialze a `Phase`"""
self.to_json = {
"DurationInSeconds": duration_in_seconds,
"InitialNumberOfUsers": initial_number_of_users,
Expand All @@ -53,7 +53,7 @@ class ModelLatencyThreshold:
"""

def __init__(self, percentile: str, value_in_milliseconds: int):
"""Initialize a `ModelLatencyThreshold`"""
"""Initialze a `ModelLatencyThreshold`"""
self.to_json = {"Percentile": percentile, "ValueInMilliseconds": value_in_milliseconds}


Expand All @@ -79,12 +79,6 @@ def right_size(
):
"""Recommends an instance type for a SageMaker or BYOC model.

Create a SageMaker ``Model`` or use a registered ``ModelPackage``,
to start an Inference Recommender job.

The name of the created model is accessible in the ``name`` field of
this ``Model`` after right_size returns.

Args:
sample_payload_url (str): The S3 path where the sample payload is stored.
supported_content_types: (list[str]): The supported MIME types for the input data.
Expand Down Expand Up @@ -125,6 +119,8 @@ def right_size(
sagemaker.model.Model: A SageMaker ``Model`` object. See
:func:`~sagemaker.model.Model` for full details.
"""
if not isinstance(self, sagemaker.model.ModelPackage):
raise ValueError("right_size() is currently only supported with a registered model")

if not framework and self._framework():
framework = INFERENCE_RECOMMENDER_FRAMEWORK_MAPPING.get(self._framework(), framework)
Expand Down Expand Up @@ -153,36 +149,12 @@ def right_size(

self._init_sagemaker_session_if_does_not_exist()

if isinstance(self, sagemaker.model.Model) and not isinstance(
self, sagemaker.model.ModelPackage
):
primary_container_def = self.prepare_container_def()
if not self.name:
self._ensure_base_name_if_needed(
image_uri=primary_container_def["Image"],
script_uri=self.source_dir,
model_uri=self.model_data,
)
self._set_model_name_if_needed()

create_model_args = dict(
name=self.name,
role=self.role,
container_defs=None,
primary_container=primary_container_def,
vpc_config=self.vpc_config,
enable_network_isolation=self.enable_network_isolation(),
)
LOGGER.warning("Attempting to create new model with name %s", self.name)
self.sagemaker_session.create_model(**create_model_args)

ret_name = self.sagemaker_session.create_inference_recommendations_job(
role=self.role,
job_name=job_name,
job_type=job_type,
job_duration_in_seconds=job_duration_in_seconds,
model_name=self.name,
model_package_version_arn=getattr(self, "model_package_arn", None),
model_package_version_arn=self.model_package_arn,
framework=framework,
framework_version=framework_version,
sample_payload_url=sample_payload_url,
Expand Down
18 changes: 1 addition & 17 deletions src/sagemaker/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -4820,7 +4820,6 @@ def _create_inference_recommendations_job_request(
framework: str,
sample_payload_url: str,
supported_content_types: List[str],
model_name: str = None,
model_package_version_arn: str = None,
job_duration_in_seconds: int = None,
job_type: str = "Default",
Expand All @@ -4844,7 +4843,6 @@ def _create_inference_recommendations_job_request(
framework (str): The machine learning framework of the Image URI.
sample_payload_url (str): The S3 path where the sample payload is stored.
supported_content_types (List[str]): The supported MIME types for the input data.
model_name (str): Name of the Amazon SageMaker ``Model`` to be used.
model_package_version_arn (str): The Amazon Resource Name (ARN) of a
versioned model package.
job_duration_in_seconds (int): The maximum job duration that a job
Expand Down Expand Up @@ -4892,15 +4890,10 @@ def _create_inference_recommendations_job_request(
"RoleArn": role,
"InputConfig": {
"ContainerConfig": containerConfig,
"ModelPackageVersionArn": model_package_version_arn,
},
}

request.get("InputConfig").update(
{"ModelPackageVersionArn": model_package_version_arn}
if model_package_version_arn
else {"ModelName": model_name}
)

if job_description:
request["JobDescription"] = job_description
if job_duration_in_seconds:
Expand All @@ -4925,7 +4918,6 @@ def create_inference_recommendations_job(
supported_content_types: List[str],
job_name: str = None,
job_type: str = "Default",
model_name: str = None,
model_package_version_arn: str = None,
job_duration_in_seconds: int = None,
nearest_model_name: str = None,
Expand All @@ -4946,7 +4938,6 @@ def create_inference_recommendations_job(
You must grant sufficient permissions to this role.
sample_payload_url (str): The S3 path where the sample payload is stored.
supported_content_types (List[str]): The supported MIME types for the input data.
model_name (str): Name of the Amazon SageMaker ``Model`` to be used.
model_package_version_arn (str): The Amazon Resource Name (ARN) of a
versioned model package.
job_name (str): The name of the job being run.
Expand All @@ -4973,12 +4964,6 @@ def create_inference_recommendations_job(
str: The name of the job created. In the form of `SMPYTHONSDK-<timestamp>`
"""

if model_name is None and model_package_version_arn is None:
raise ValueError("Please provide either model_name or model_package_version_arn.")

if model_name is not None and model_package_version_arn is not None:
raise ValueError("Please provide either model_name or model_package_version_arn.")

if not job_name:
unique_tail = uuid.uuid4()
job_name = "SMPYTHONSDK-" + str(unique_tail)
Expand All @@ -4987,7 +4972,6 @@ def create_inference_recommendations_job(
create_inference_recommendations_job_request = (
self._create_inference_recommendations_job_request(
role=role,
model_name=model_name,
model_package_version_arn=model_package_version_arn,
job_name=job_name,
job_type=job_type,
Expand Down
181 changes: 0 additions & 181 deletions tests/integ/test_inference_recommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

import pytest

from sagemaker.model import Model
from sagemaker.sklearn.model import SKLearnModel, SKLearnPredictor
from sagemaker.utils import unique_name_from_base
from tests.integ import DATA_DIR
Expand Down Expand Up @@ -155,120 +154,6 @@ def advanced_right_sized_model(sagemaker_session, cpu_instance_type):
)


@pytest.fixture(scope="module")
def default_right_sized_unregistered_model(sagemaker_session, cpu_instance_type):
with timeout(minutes=45):
try:
ir_job_name = unique_name_from_base("test-ir-right-size-job-name")
model_data = sagemaker_session.upload_data(path=IR_SKLEARN_MODEL)
payload_data = sagemaker_session.upload_data(path=IR_SKLEARN_PAYLOAD)

iam_client = sagemaker_session.boto_session.client("iam")
role_arn = iam_client.get_role(RoleName="SageMakerRole")["Role"]["Arn"]

sklearn_model = SKLearnModel(
model_data=model_data,
role=role_arn,
entry_point=IR_SKLEARN_ENTRY_POINT,
framework_version=IR_SKLEARN_FRAMEWORK_VERSION,
)

return (
sklearn_model.right_size(
job_name=ir_job_name,
sample_payload_url=payload_data,
supported_content_types=IR_SKLEARN_CONTENT_TYPE,
supported_instance_types=[cpu_instance_type],
framework=IR_SKLEARN_FRAMEWORK,
log_level="Quiet",
),
ir_job_name,
)
except Exception:
sagemaker_session.delete_model(ModelName=sklearn_model.name)


@pytest.fixture(scope="module")
def advanced_right_sized_unregistered_model(sagemaker_session, cpu_instance_type):
with timeout(minutes=45):
try:
model_data = sagemaker_session.upload_data(path=IR_SKLEARN_MODEL)
payload_data = sagemaker_session.upload_data(path=IR_SKLEARN_PAYLOAD)

iam_client = sagemaker_session.boto_session.client("iam")
role_arn = iam_client.get_role(RoleName="SageMakerRole")["Role"]["Arn"]

sklearn_model = SKLearnModel(
model_data=model_data,
role=role_arn,
entry_point=IR_SKLEARN_ENTRY_POINT,
framework_version=IR_SKLEARN_FRAMEWORK_VERSION,
)

hyperparameter_ranges = [
{
"instance_types": CategoricalParameter([cpu_instance_type]),
"TEST_PARAM": CategoricalParameter(
["TEST_PARAM_VALUE_1", "TEST_PARAM_VALUE_2"]
),
}
]

phases = [
Phase(duration_in_seconds=300, initial_number_of_users=2, spawn_rate=2),
Phase(duration_in_seconds=300, initial_number_of_users=14, spawn_rate=2),
]

model_latency_thresholds = [
ModelLatencyThreshold(percentile="P95", value_in_milliseconds=100)
]

return sklearn_model.right_size(
sample_payload_url=payload_data,
supported_content_types=IR_SKLEARN_CONTENT_TYPE,
framework=IR_SKLEARN_FRAMEWORK,
job_duration_in_seconds=3600,
hyperparameter_ranges=hyperparameter_ranges,
phases=phases,
model_latency_thresholds=model_latency_thresholds,
max_invocations=100,
max_tests=5,
max_parallel_tests=5,
log_level="Quiet",
)

except Exception:
sagemaker_session.delete_model(ModelName=sklearn_model.name)


@pytest.fixture(scope="module")
def default_right_sized_unregistered_base_model(sagemaker_session, cpu_instance_type):
with timeout(minutes=45):
try:
ir_job_name = unique_name_from_base("test-ir-right-size-job-name")
model_data = sagemaker_session.upload_data(path=IR_SKLEARN_MODEL)
payload_data = sagemaker_session.upload_data(path=IR_SKLEARN_PAYLOAD)

iam_client = sagemaker_session.boto_session.client("iam")
role_arn = iam_client.get_role(RoleName="SageMakerRole")["Role"]["Arn"]

model = Model(model_data=model_data, role=role_arn, entry_point=IR_SKLEARN_ENTRY_POINT)

return (
model.right_size(
job_name=ir_job_name,
sample_payload_url=payload_data,
supported_content_types=IR_SKLEARN_CONTENT_TYPE,
supported_instance_types=[cpu_instance_type],
framework=IR_SKLEARN_FRAMEWORK,
log_level="Quiet",
),
ir_job_name,
)
except Exception:
sagemaker_session.delete_model(ModelName=model.name)


@pytest.mark.slow_test
def test_default_right_size_and_deploy_registered_model_sklearn(
default_right_sized_model, sagemaker_session
Expand All @@ -291,72 +176,6 @@ def test_default_right_size_and_deploy_registered_model_sklearn(
predictor.delete_endpoint()


@pytest.mark.slow_test
def test_default_right_size_and_deploy_unregistered_model_sklearn(
default_right_sized_unregistered_model, sagemaker_session
):
endpoint_name = unique_name_from_base("test-ir-right-size-default-unregistered-sklearn")

right_size_model, ir_job_name = default_right_sized_unregistered_model
with timeout(minutes=45):
try:
right_size_model.predictor_cls = SKLearnPredictor
predictor = right_size_model.deploy(endpoint_name=endpoint_name)

payload = pd.read_csv(IR_SKLEARN_DATA, header=None)

inference = predictor.predict(payload)
assert inference is not None
assert 26 == len(inference)
finally:
predictor.delete_model()
predictor.delete_endpoint()


@pytest.mark.slow_test
def test_default_right_size_and_deploy_unregistered_base_model(
default_right_sized_unregistered_base_model, sagemaker_session
):
endpoint_name = unique_name_from_base("test-ir-right-size-default-unregistered-base")

right_size_model, ir_job_name = default_right_sized_unregistered_base_model
with timeout(minutes=45):
try:
right_size_model.predictor_cls = SKLearnPredictor
predictor = right_size_model.deploy(endpoint_name=endpoint_name)

payload = pd.read_csv(IR_SKLEARN_DATA, header=None)

inference = predictor.predict(payload)
assert inference is not None
assert 26 == len(inference)
finally:
predictor.delete_model()
predictor.delete_endpoint()


@pytest.mark.slow_test
def test_advanced_right_size_and_deploy_unregistered_model_sklearn(
advanced_right_sized_unregistered_model, sagemaker_session
):
endpoint_name = unique_name_from_base("test-ir-right-size-advanced-sklearn")

right_size_model = advanced_right_sized_unregistered_model
with timeout(minutes=45):
try:
right_size_model.predictor_cls = SKLearnPredictor
predictor = right_size_model.deploy(endpoint_name=endpoint_name)

payload = pd.read_csv(IR_SKLEARN_DATA, header=None)

inference = predictor.predict(payload)
assert inference is not None
assert 26 == len(inference)
finally:
predictor.delete_model()
predictor.delete_endpoint()


@pytest.mark.slow_test
def test_advanced_right_size_and_deploy_registered_model_sklearn(
advanced_right_sized_model, sagemaker_session
Expand Down
Loading