diff --git a/doc/overview.rst b/doc/overview.rst index d2a5a8fb87..df340338d9 100644 --- a/doc/overview.rst +++ b/doc/overview.rst @@ -1226,28 +1226,28 @@ to configure or manage the underlying infrastructure. After you trained a model, Serverless endpoint and then invoke the endpoint with the model to get inference results back. More information about SageMaker Serverless Inference can be found in the `AWS documentation `__. -For using SageMaker Serverless Inference, if you plan to use any of the SageMaker-provided container or Bring Your Own Container -model, you will need to pass ``image_uri``. An example to use ``image_uri`` for creating MXNet model: +For using SageMaker Serverless Inference, you can either use SageMaker-provided container or Bring Your Own Container model. +A step by step example for using Serverless Inference with MXNet image : + +Firstly, create MXNet model .. code:: python from sagemaker.mxnet import MXNetModel + from sagemaker.serverless import ServerlessInferenceConfig import sagemaker role = sagemaker.get_execution_role() # create MXNet Model Class - mxnet_model = MXNetModel( + model = MXNetModel( model_data="s3://my_bucket/pretrained_model/model.tar.gz", # path to your trained sagemaker model role=role, # iam role with permissions to create an Endpoint entry_point="inference.py", - image_uri="763104351884.dkr.ecr.us-west-2.amazonaws.com/mxnet-inference:1.4.1-cpu-py3" # image wanted to use + py_version="py3", # Python version + framework_version="1.6.0", # MXNet framework version ) -For more Amazon SageMaker provided algorithms and containers image paths, please check this page: `Amazon SageMaker provided -algorithms and Deep Learning Containers `_. -After creating model using ``image_uri``, you can then follow the steps below to create serverless endpoint. - To deploy serverless endpoint, you will need to create a ``ServerlessInferenceConfig``. If you create ``ServerlessInferenceConfig`` without specifying its arguments, the default ``MemorySizeInMB`` will be **2048** and the default ``MaxConcurrency`` will be **5** : @@ -1283,7 +1283,6 @@ Or directly using model's ``deploy()`` method to deploy a serverless endpoint: # Deploys the model to a SageMaker serverless endpoint serverless_predictor = model.deploy(serverless_inference_config=serverless_config) - After deployment is complete, you can use predictor's ``predict()`` method to invoke the serverless endpoint just like real-time endpoints: diff --git a/src/sagemaker/chainer/model.py b/src/sagemaker/chainer/model.py index 8e63204cf8..41e3a6e838 100644 --- a/src/sagemaker/chainer/model.py +++ b/src/sagemaker/chainer/model.py @@ -99,13 +99,10 @@ def __init__( file which should be executed as the entry point to model hosting. If ``source_dir`` is specified, then ``entry_point`` must point to a file located at the root of ``source_dir``. - image_uri (str): A Docker image URI (default: None). In serverless - inferece, it is required. More image information can be found in - `Amazon SageMaker provided algorithms and Deep Learning Containers - `_. - In instance based inference, if not specified, a default image for - Chainer will be used. If ``framework_version`` or ``py_version`` - are ``None``, then ``image_uri`` is required. If also ``None``, + image_uri (str): A Docker image URI (default: None). If not specified, + a default image for Chainer will be used. + If ``framework_version`` or ``py_version`` + are ``None``, then ``image_uri`` is required. If ``image_uri`` is also ``None``, then a ``ValueError`` will be raised. framework_version (str): Chainer version you want to use for executing your model training code. Defaults to ``None``. Required @@ -143,7 +140,9 @@ def __init__( self.model_server_workers = model_server_workers - def prepare_container_def(self, instance_type=None, accelerator_type=None): + def prepare_container_def( + self, instance_type=None, accelerator_type=None, serverless_inference_config=None + ): """Return a container definition with framework configuration set in model environment. Args: @@ -152,6 +151,9 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None): accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and making inferences to the model. For example, 'ml.eia1.medium'. + serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig): + Specifies configuration related to serverless endpoint. Instance type is + not provided in serverless inference. So this is used to find image URIs. Returns: dict[str, str]: A container definition object usable with the @@ -159,14 +161,17 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None): """ deploy_image = self.image_uri if not deploy_image: - if instance_type is None: + if instance_type is None and serverless_inference_config is None: raise ValueError( "Must supply either an instance type (for choosing CPU vs GPU) or an image URI." ) region_name = self.sagemaker_session.boto_session.region_name deploy_image = self.serving_image_uri( - region_name, instance_type, accelerator_type=accelerator_type + region_name, + instance_type, + accelerator_type=accelerator_type, + serverless_inference_config=serverless_inference_config, ) deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image) @@ -178,13 +183,18 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None): deploy_env[MODEL_SERVER_WORKERS_PARAM_NAME.upper()] = str(self.model_server_workers) return sagemaker.container_def(deploy_image, self.model_data, deploy_env) - def serving_image_uri(self, region_name, instance_type, accelerator_type=None): + def serving_image_uri( + self, region_name, instance_type, accelerator_type=None, serverless_inference_config=None + ): """Create a URI for the serving image. Args: region_name (str): AWS region where the image is uploaded. instance_type (str): SageMaker instance type. Used to determine device type (cpu/gpu/family-specific optimized). + serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig): + Specifies configuration related to serverless endpoint. Instance type is + not provided in serverless inference. So this is used to determine device type. Returns: str: The appropriate image URI based on the given parameters. @@ -198,4 +208,5 @@ def serving_image_uri(self, region_name, instance_type, accelerator_type=None): instance_type=instance_type, accelerator_type=accelerator_type, image_scope="inference", + serverless_inference_config=serverless_inference_config, ) diff --git a/src/sagemaker/huggingface/model.py b/src/sagemaker/huggingface/model.py index 4de5770242..d1e876fa21 100644 --- a/src/sagemaker/huggingface/model.py +++ b/src/sagemaker/huggingface/model.py @@ -133,11 +133,7 @@ def __init__( py_version (str): Python version you want to use for executing your model training code. Defaults to ``None``. Required unless ``image_uri`` is provided. - image_uri (str): A Docker image URI. Defaults to None. For serverless - inferece, it is required. More image information can be found in - `Amazon SageMaker provided algorithms and Deep Learning Containers - `_. - For instance based inference, if not specified, a + image_uri (str): A Docker image URI. Defaults to None. If not specified, a default image for PyTorch will be used. If ``framework_version`` or ``py_version`` are ``None``, then ``image_uri`` is required. If also ``None``, then a ``ValueError`` will be raised. @@ -272,7 +268,7 @@ def deploy( is not None. Otherwise, return None. """ - if not self.image_uri and instance_type.startswith("ml.inf"): + if not self.image_uri and instance_type is not None and instance_type.startswith("ml.inf"): self.image_uri = self.serving_image_uri( region_name=self.sagemaker_session.boto_session.region_name, instance_type=instance_type, @@ -365,7 +361,9 @@ def register( drift_check_baselines=drift_check_baselines, ) - def prepare_container_def(self, instance_type=None, accelerator_type=None): + def prepare_container_def( + self, instance_type=None, accelerator_type=None, serverless_inference_config=None + ): """A container definition with framework configuration set in model environment variables. Args: @@ -374,6 +372,9 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None): accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and making inferences to the model. + serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig): + Specifies configuration related to serverless endpoint. Instance type is + not provided in serverless inference. So this is used to find image URIs. Returns: dict[str, str]: A container definition object usable with the @@ -381,14 +382,17 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None): """ deploy_image = self.image_uri if not deploy_image: - if instance_type is None: + if instance_type is None and serverless_inference_config is None: raise ValueError( "Must supply either an instance type (for choosing CPU vs GPU) or an image URI." ) region_name = self.sagemaker_session.boto_session.region_name deploy_image = self.serving_image_uri( - region_name, instance_type, accelerator_type=accelerator_type + region_name, + instance_type, + accelerator_type=accelerator_type, + serverless_inference_config=serverless_inference_config, ) deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image) @@ -402,7 +406,13 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None): deploy_image, self.repacked_model_data or self.model_data, deploy_env ) - def serving_image_uri(self, region_name, instance_type, accelerator_type=None): + def serving_image_uri( + self, + region_name, + instance_type=None, + accelerator_type=None, + serverless_inference_config=None, + ): """Create a URI for the serving image. Args: @@ -412,6 +422,9 @@ def serving_image_uri(self, region_name, instance_type, accelerator_type=None): accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and making inferences to the model. + serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig): + Specifies configuration related to serverless endpoint. Instance type is + not provided in serverless inference. So this is used used to determine device type. Returns: str: The appropriate image URI based on the given parameters. @@ -432,4 +445,5 @@ def serving_image_uri(self, region_name, instance_type, accelerator_type=None): accelerator_type=accelerator_type, image_scope="inference", base_framework_version=base_framework_version, + serverless_inference_config=serverless_inference_config, ) diff --git a/src/sagemaker/image_uris.py b/src/sagemaker/image_uris.py index bec22b4d78..2428b1ca85 100644 --- a/src/sagemaker/image_uris.py +++ b/src/sagemaker/image_uris.py @@ -48,6 +48,7 @@ def retrieve( tolerate_deprecated_model=False, sdk_version=None, inference_tool=None, + serverless_inference_config=None, ) -> str: """Retrieves the ECR URI for the Docker image matching the given arguments. @@ -94,6 +95,9 @@ def retrieve( inference_tool (str): the tool that will be used to aid in the inference. Valid values: "neuron, None" (default: None). + serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig): + Specifies configuration related to serverless endpoint. Instance type is + not provided in serverless inference. So this is used to determine processor type. Returns: str: The ECR URI for the corresponding SageMaker Docker image. @@ -159,7 +163,9 @@ def retrieve( repo = version_config["repository"] processor = _processor( - instance_type, config.get("processors") or version_config.get("processors") + instance_type, + config.get("processors") or version_config.get("processors"), + serverless_inference_config, ) # if container version is available in .json file, utilize that @@ -202,7 +208,9 @@ def retrieve( tag = _format_tag(tag_prefix, processor, py_version, container_version, inference_tool) - if _should_auto_select_container_version(instance_type, distribution): + if instance_type is not None and _should_auto_select_container_version( + instance_type, distribution + ): container_versions = { "tensorflow-2.3-gpu-py37": "cu110-ubuntu18.04-v3", "tensorflow-2.3.1-gpu-py37": "cu110-ubuntu18.04", @@ -327,7 +335,7 @@ def _registry_from_region(region, registry_dict): return registry_dict[region] -def _processor(instance_type, available_processors): +def _processor(instance_type, available_processors, serverless_inference_config=None): """Returns the processor type for the given instance type.""" if not available_processors: logger.info("Ignoring unnecessary instance type: %s.", instance_type) @@ -337,6 +345,10 @@ def _processor(instance_type, available_processors): logger.info("Defaulting to only supported image scope: %s.", available_processors[0]) return available_processors[0] + if serverless_inference_config is not None: + logger.info("Defaulting to CPU type when using serverless inference") + return "cpu" + if not instance_type: raise ValueError( "Empty SageMaker instance type. For options, see: " diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py index ffa6cf1a84..bfdfcdb2c9 100644 --- a/src/sagemaker/model.py +++ b/src/sagemaker/model.py @@ -383,7 +383,10 @@ def _init_sagemaker_session_if_does_not_exist(self, instance_type=None): self.sagemaker_session = session.Session() def prepare_container_def( - self, instance_type=None, accelerator_type=None + self, + instance_type=None, + accelerator_type=None, + serverless_inference_config=None, ): # pylint: disable=unused-argument """Return a dict created by ``sagemaker.container_def()``. @@ -398,6 +401,9 @@ def prepare_container_def( accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and making inferences to the model. For example, 'ml.eia1.medium'. + serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig): + Specifies configuration related to serverless endpoint. Instance type is + not provided in serverless inference. So this is used to find image URIs. Returns: dict: A container definition object usable with the CreateModel API. @@ -498,7 +504,9 @@ def enable_network_isolation(self): """ return self._enable_network_isolation - def _create_sagemaker_model(self, instance_type=None, accelerator_type=None, tags=None): + def _create_sagemaker_model( + self, instance_type=None, accelerator_type=None, tags=None, serverless_inference_config=None + ): """Create a SageMaker Model Entity Args: @@ -514,8 +522,15 @@ def _create_sagemaker_model(self, instance_type=None, accelerator_type=None, tag 'tagvalue'}] For more information about tags, see https://boto3.amazonaws.com/v1/documentation /api/latest/reference/services/sagemaker.html#SageMaker.Client.add_tags + serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig): + Specifies configuration related to serverless endpoint. Instance type is + not provided in serverless inference. So this is used to find image URIs. """ - container_def = self.prepare_container_def(instance_type, accelerator_type=accelerator_type) + container_def = self.prepare_container_def( + instance_type, + accelerator_type=accelerator_type, + serverless_inference_config=serverless_inference_config, + ) self._ensure_base_name_if_needed( image_uri=container_def["Image"], script_uri=self.source_dir, model_uri=self.model_data @@ -983,7 +998,9 @@ def deploy( if self._base_name is not None: self._base_name = "-".join((self._base_name, compiled_model_suffix)) - self._create_sagemaker_model(instance_type, accelerator_type, tags) + self._create_sagemaker_model( + instance_type, accelerator_type, tags, serverless_inference_config + ) serverless_inference_config_dict = ( serverless_inference_config._to_request_dict() if is_serverless else None diff --git a/src/sagemaker/mxnet/model.py b/src/sagemaker/mxnet/model.py index c8582c552c..f9fee22c6f 100644 --- a/src/sagemaker/mxnet/model.py +++ b/src/sagemaker/mxnet/model.py @@ -107,14 +107,10 @@ def __init__( py_version (str): Python version you want to use for executing your model training code. Defaults to ``None``. Required unless ``image_uri`` is provided. - image_uri (str): A Docker image URI (default: None). For serverless - inferece, it is required. More image information can be found in - `Amazon SageMaker provided algorithms and Deep Learning Containers - `_. - For instance based inference, if not specified, a default image for - MXNet will be used. + image_uri (str): A Docker image URI (default: None). If not specified, + a default image for MXNet will be used. If ``framework_version`` or ``py_version`` are ``None``, then - ``image_uri`` is required. If also ``None``, then a ``ValueError`` + ``image_uri`` is required. If ``image_uri`` is also ``None``, then a ``ValueError`` will be raised. predictor_cls (callable[str, sagemaker.session.Session]): A function to call to create a predictor with an endpoint name and @@ -220,7 +216,9 @@ def register( customer_metadata_properties=customer_metadata_properties, ) - def prepare_container_def(self, instance_type=None, accelerator_type=None): + def prepare_container_def( + self, instance_type=None, accelerator_type=None, serverless_inference_config=None + ): """Return a container definition with framework configuration. Framework configuration is set in model environment variables. @@ -231,6 +229,9 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None): accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and making inferences to the model. For example, 'ml.eia1.medium'. + serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig): + Specifies configuration related to serverless endpoint. Instance type is + not provided in serverless inference. So this is used to find image URIs. Returns: dict[str, str]: A container definition object usable with the @@ -238,14 +239,17 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None): """ deploy_image = self.image_uri if not deploy_image: - if instance_type is None: + if instance_type is None and serverless_inference_config is None: raise ValueError( "Must supply either an instance type (for choosing CPU vs GPU) or an image URI." ) region_name = self.sagemaker_session.boto_session.region_name deploy_image = self.serving_image_uri( - region_name, instance_type, accelerator_type=accelerator_type + region_name, + instance_type, + accelerator_type=accelerator_type, + serverless_inference_config=serverless_inference_config, ) deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image) @@ -259,7 +263,9 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None): deploy_image, self.repacked_model_data or self.model_data, deploy_env ) - def serving_image_uri(self, region_name, instance_type, accelerator_type=None): + def serving_image_uri( + self, region_name, instance_type, accelerator_type=None, serverless_inference_config=None + ): """Create a URI for the serving image. Args: @@ -269,6 +275,9 @@ def serving_image_uri(self, region_name, instance_type, accelerator_type=None): accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and making inferences to the model (default: None). For example, 'ml.eia1.medium'. + serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig): + Specifies configuration related to serverless endpoint. Instance type is + not provided in serverless inference. So this is used to determine device type. Returns: str: The appropriate image URI based on the given parameters. @@ -282,6 +291,7 @@ def serving_image_uri(self, region_name, instance_type, accelerator_type=None): instance_type=instance_type, accelerator_type=accelerator_type, image_scope="inference", + serverless_inference_config=serverless_inference_config, ) def _is_mms_version(self): diff --git a/src/sagemaker/pytorch/model.py b/src/sagemaker/pytorch/model.py index 0837ac742c..2b45a4c506 100644 --- a/src/sagemaker/pytorch/model.py +++ b/src/sagemaker/pytorch/model.py @@ -107,13 +107,10 @@ def __init__( py_version (str): Python version you want to use for executing your model training code. Defaults to ``None``. Required unless ``image_uri`` is provided. - image_uri (str): A Docker image URI (default: None). For serverless - inferece, it is required. More image information can be found in - `Amazon SageMaker provided algorithms and Deep Learning Containers - `_. - For instance based inference, if not specified, a default image for - PyTorch will be used. If ``framework_version`` or ``py_version`` are - ``None``, then ``image_uri`` is required. If also ``None``, then a + image_uri (str): A Docker image URI (default: None). If not specified, + a default image for PyTorch will be used. + If ``framework_version`` or ``py_version`` are + ``None``, then ``image_uri`` is required. If ``image_uri`` is also ``None``, then a ``ValueError`` will be raised. predictor_cls (callable[str, sagemaker.session.Session]): A function to call to create a predictor with an endpoint name and @@ -220,7 +217,9 @@ def register( customer_metadata_properties=customer_metadata_properties, ) - def prepare_container_def(self, instance_type=None, accelerator_type=None): + def prepare_container_def( + self, instance_type=None, accelerator_type=None, serverless_inference_config=None + ): """A container definition with framework configuration set in model environment variables. Args: @@ -229,6 +228,9 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None): accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and making inferences to the model. + serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig): + Specifies configuration related to serverless endpoint. Instance type is + not provided in serverless inference. So this is used to find image URIs. Returns: dict[str, str]: A container definition object usable with the @@ -236,14 +238,17 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None): """ deploy_image = self.image_uri if not deploy_image: - if instance_type is None: + if instance_type is None and serverless_inference_config is None: raise ValueError( "Must supply either an instance type (for choosing CPU vs GPU) or an image URI." ) region_name = self.sagemaker_session.boto_session.region_name deploy_image = self.serving_image_uri( - region_name, instance_type, accelerator_type=accelerator_type + region_name, + instance_type, + accelerator_type=accelerator_type, + serverless_inference_config=serverless_inference_config, ) deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image) @@ -257,7 +262,9 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None): deploy_image, self.repacked_model_data or self.model_data, deploy_env ) - def serving_image_uri(self, region_name, instance_type, accelerator_type=None): + def serving_image_uri( + self, region_name, instance_type, accelerator_type=None, serverless_inference_config=None + ): """Create a URI for the serving image. Args: @@ -267,6 +274,9 @@ def serving_image_uri(self, region_name, instance_type, accelerator_type=None): accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and making inferences to the model. + serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig): + Specifies configuration related to serverless endpoint. Instance type is + not provided in serverless inference. So this is used to determine device type. Returns: str: The appropriate image URI based on the given parameters. @@ -280,6 +290,7 @@ def serving_image_uri(self, region_name, instance_type, accelerator_type=None): instance_type=instance_type, accelerator_type=accelerator_type, image_scope="inference", + serverless_inference_config=serverless_inference_config, ) def _is_mms_version(self): diff --git a/src/sagemaker/sklearn/model.py b/src/sagemaker/sklearn/model.py index b9427fc25c..4cdfb99af7 100644 --- a/src/sagemaker/sklearn/model.py +++ b/src/sagemaker/sklearn/model.py @@ -102,14 +102,10 @@ def __init__( model training code (default: 'py3'). Currently, 'py3' is the only supported version. If ``None`` is passed in, ``image_uri`` must be provided. - image_uri (str): A Docker image URI (default: None). For serverless - inferece, it is required. More image information can be found in - `Amazon SageMaker provided algorithms and Deep Learning Containers - `_. - For instance based inference, if not specified, a default image for - Scikit-learn will be used. + image_uri (str): A Docker image URI (default: None). If not specified, + a default image for Scikit-learn will be used. If ``framework_version`` or ``py_version`` are ``None``, then - ``image_uri`` is required. If also ``None``, then a ``ValueError`` + ``image_uri`` is required. If ``image_uri`` is also ``None``, then a ``ValueError`` will be raised. predictor_cls (callable[str, sagemaker.session.Session]): A function to call to create a predictor with an endpoint name and @@ -208,7 +204,9 @@ def register( description, ) - def prepare_container_def(self, instance_type=None, accelerator_type=None): + def prepare_container_def( + self, instance_type=None, accelerator_type=None, serverless_inference_config=None + ): """Container definition with framework configuration set in model environment variables. Args: @@ -218,6 +216,9 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None): deploy to the instance for loading and making inferences to the model. This parameter is unused because accelerator types are not supported by SKLearnModel. + serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig): + Specifies configuration related to serverless endpoint. Instance type is + not provided in serverless inference. So this is used to find image URIs. Returns: dict[str, str]: A container definition object usable with the @@ -244,12 +245,16 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None): ) return sagemaker.container_def(deploy_image, model_data_uri, deploy_env) - def serving_image_uri(self, region_name, instance_type): + def serving_image_uri(self, region_name, instance_type, serverless_inference_config=None): """Create a URI for the serving image. Args: region_name (str): AWS region where the image is uploaded. instance_type (str): SageMaker instance type. + serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig): + Specifies configuration related to serverless endpoint. Instance type is + not provided in serverless inference. So this is used to determine device type. + Returns: str: The appropriate image URI based on the given parameters. @@ -261,4 +266,5 @@ def serving_image_uri(self, region_name, instance_type): version=self.framework_version, py_version=self.py_version, instance_type=instance_type, + serverless_inference_config=serverless_inference_config, ) diff --git a/src/sagemaker/tensorflow/model.py b/src/sagemaker/tensorflow/model.py index 0b7c369f48..6b45a8eaf0 100644 --- a/src/sagemaker/tensorflow/model.py +++ b/src/sagemaker/tensorflow/model.py @@ -145,13 +145,10 @@ def __init__( file which should be executed as the entry point to model hosting. If ``source_dir`` is specified, then ``entry_point`` must point to a file located at the root of ``source_dir``. - image_uri (str): A Docker image URI (default: None). For serverless - inferece, it is required. More image information can be found in - `Amazon SageMaker provided algorithms and Deep Learning Containers - `_. - For instance based inference, if not specified, a default image for - TensorFlow Serving will be used. If ``framework_version`` is ``None``, - then ``image_uri`` is required. If also ``None``, then a ``ValueError`` + image_uri (str): A Docker image URI (default: None). If not specified, + a default image for TensorFlow Serving will be used. + If ``framework_version`` is ``None``, then ``image_uri`` is required. + If ``image_uri`` is also ``None``, then a ``ValueError`` will be raised. framework_version (str): Optional. TensorFlow Serving version you want to use. Defaults to ``None``. Required unless ``image_uri`` is @@ -312,22 +309,30 @@ def _eia_supported(self): and framework_version <= self.LATEST_EIA_VERSION ) - def prepare_container_def(self, instance_type=None, accelerator_type=None): + def prepare_container_def( + self, instance_type=None, accelerator_type=None, serverless_inference_config=None + ): """Prepare the container definition. Args: instance_type: Instance type of the container. accelerator_type: Accelerator type, if applicable. + serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig): + Specifies configuration related to serverless endpoint. Instance type is + not provided in serverless inference. So this is used to find image URIs. Returns: A container definition for deploying a ``Model`` to an ``Endpoint``. """ - if self.image_uri is None and instance_type is None: - raise ValueError( - "Must supply either an instance type (for choosing CPU vs GPU) or an image URI." - ) + if not self.image_uri: + if instance_type is None and serverless_inference_config is None: + raise ValueError( + "Must supply either an instance type (for choosing CPU vs GPU) or an image URI." + ) - image_uri = self._get_image_uri(instance_type, accelerator_type) + image_uri = self._get_image_uri( + instance_type, accelerator_type, serverless_inference_config=serverless_inference_config + ) env = self._get_container_env() if self.entry_point: @@ -365,7 +370,13 @@ def _get_container_env(self): env[self.LOG_LEVEL_PARAM_NAME] = self.LOG_LEVEL_MAP[self._container_log_level] return env - def _get_image_uri(self, instance_type, accelerator_type=None, region_name=None): + def _get_image_uri( + self, + instance_type, + accelerator_type=None, + region_name=None, + serverless_inference_config=None, + ): """Placeholder docstring.""" if self.image_uri: return self.image_uri @@ -377,10 +388,11 @@ def _get_image_uri(self, instance_type, accelerator_type=None, region_name=None) instance_type=instance_type, accelerator_type=accelerator_type, image_scope="inference", + serverless_inference_config=serverless_inference_config, ) def serving_image_uri( - self, region_name, instance_type, accelerator_type=None + self, region_name, instance_type, accelerator_type=None, serverless_inference_config=None ): # pylint: disable=unused-argument """Create a URI for the serving image. @@ -391,11 +403,17 @@ def serving_image_uri( accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and making inferences to the model (default: None). For example, 'ml.eia1.medium'. + serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig): + Specifies configuration related to serverless endpoint. Instance type is + not provided in serverless inference. So this is used to determine device type. Returns: str: The appropriate image URI based on the given parameters. """ return self._get_image_uri( - instance_type=instance_type, accelerator_type=accelerator_type, region_name=region_name + instance_type=instance_type, + accelerator_type=accelerator_type, + region_name=region_name, + serverless_inference_config=serverless_inference_config, ) diff --git a/src/sagemaker/xgboost/model.py b/src/sagemaker/xgboost/model.py index e78b1cd151..2b90eea0f2 100644 --- a/src/sagemaker/xgboost/model.py +++ b/src/sagemaker/xgboost/model.py @@ -91,12 +91,8 @@ def __init__( entry_point (str): Path (absolute or relative) to the Python source file which should be executed as the entry point to model hosting. If ``source_dir`` is specified, then ``entry_point`` must point to a file located at the root of ``source_dir``. - image_uri (str): A Docker image URI (default: None). For serverless inferece, it is - required. More image information can be found in - `Amazon SageMaker provided algorithms and Deep Learning Containers - `_. - For instance based inference, if not specified, a default image for XGBoost - is be used. + image_uri (str): A Docker image URI (default: None). If not specified, + a default image for XGBoost is be used. py_version (str): Python version you want to use for executing your model training code (default: 'py3'). framework_version (str): XGBoost version you want to use for executing your model @@ -128,7 +124,9 @@ def __init__( validate_py_version(py_version) validate_framework_version(framework_version) - def prepare_container_def(self, instance_type=None, accelerator_type=None): + def prepare_container_def( + self, instance_type=None, accelerator_type=None, serverless_inference_config=None + ): """Return a container definition with framework configuration. The framework configuration is set in model environment variables. @@ -138,6 +136,9 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None): accelerator_type (str): The Elastic Inference accelerator type to deploy to the instance for loading and making inferences to the model. This parameter is unused because accelerator types are not supported by XGBoostModel. + serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig): + Specifies configuration related to serverless endpoint. Instance type is + not provided in serverless inference. So this is used to find image URIs. Returns: dict[str, str]: A container definition object usable with the CreateModel API. @@ -145,7 +146,9 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None): deploy_image = self.image_uri if not deploy_image: deploy_image = self.serving_image_uri( - self.sagemaker_session.boto_region_name, instance_type + self.sagemaker_session.boto_region_name, + instance_type, + serverless_inference_config=serverless_inference_config, ) deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image) @@ -160,12 +163,16 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None): ) return sagemaker.container_def(deploy_image, model_data, deploy_env) - def serving_image_uri(self, region_name, instance_type): + def serving_image_uri(self, region_name, instance_type, serverless_inference_config=None): """Create a URI for the serving image. Args: region_name (str): AWS region where the image is uploaded. instance_type (str): SageMaker instance type. Must be a CPU instance type. + serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig): + Specifies configuration related to serverless endpoint. Instance type is + not provided in serverless inference. So this is used to determine device type. + Returns: str: The appropriate image URI based on the given parameters. @@ -175,4 +182,5 @@ def serving_image_uri(self, region_name, instance_type): region_name, version=self.framework_version, instance_type=instance_type, + serverless_inference_config=serverless_inference_config, ) diff --git a/tests/integ/test_factorization_machines.py b/tests/integ/test_factorization_machines.py index d07bf9bf6b..08f3dd003b 100644 --- a/tests/integ/test_factorization_machines.py +++ b/tests/integ/test_factorization_machines.py @@ -17,6 +17,7 @@ import pytest from sagemaker import FactorizationMachines, FactorizationMachinesModel +from sagemaker.serverless import ServerlessInferenceConfig from sagemaker.utils import unique_name_from_base from tests.integ import datasets, TRAINING_DEFAULT_TIMEOUT_MINUTES from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name @@ -103,3 +104,42 @@ def test_async_factorization_machines(sagemaker_session, cpu_instance_type, trai assert len(result) == 10 for record in result: assert record.label["score"] is not None + + +def test_factorization_machines_serverless_inference( + sagemaker_session, cpu_instance_type, training_set +): + job_name = unique_name_from_base("fm-serverless") + + with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): + fm = FactorizationMachines( + role="SageMakerRole", + instance_count=1, + instance_type=cpu_instance_type, + num_factors=10, + predictor_type="regressor", + epochs=2, + clip_gradient=1e2, + eps=0.001, + rescale_grad=1.0 / 100, + sagemaker_session=sagemaker_session, + ) + + # training labels must be 'float32' + fm.fit( + fm.record_set(training_set[0][:200], training_set[1][:200].astype("float32")), + job_name=job_name, + ) + + with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): + model = FactorizationMachinesModel( + fm.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session + ) + predictor = model.deploy( + serverless_inference_config=ServerlessInferenceConfig(), endpoint_name=job_name + ) + result = predictor.predict(training_set[0][:10]) + + assert len(result) == 10 + for record in result: + assert record.label["score"] is not None diff --git a/tests/integ/test_ipinsights.py b/tests/integ/test_ipinsights.py index 941aac2ee6..9b3b772680 100644 --- a/tests/integ/test_ipinsights.py +++ b/tests/integ/test_ipinsights.py @@ -16,6 +16,7 @@ from sagemaker import IPInsights, IPInsightsModel from sagemaker.predictor import Predictor +from sagemaker.serverless import ServerlessInferenceConfig from sagemaker.utils import unique_name_from_base from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES from tests.integ.record_set import prepare_record_set_from_local_files @@ -60,3 +61,44 @@ def test_ipinsights(sagemaker_session, cpu_instance_type): assert len(result["predictions"]) == 1 assert 0 > result["predictions"][0]["dot_product"] > -1 # We expect ~ -0.22 + + +def test_ipinsights_serverless_inference(sagemaker_session, cpu_instance_type): + job_name = unique_name_from_base("ipinsights-serverless") + + with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): + data_path = os.path.join(DATA_DIR, "ipinsights") + data_filename = "train.csv" + + with open(os.path.join(data_path, data_filename), "rb") as f: + num_records = len(f.readlines()) + + ipinsights = IPInsights( + role="SageMakerRole", + instance_count=1, + instance_type=cpu_instance_type, + num_entity_vectors=10, + vector_dim=100, + sagemaker_session=sagemaker_session, + ) + + record_set = prepare_record_set_from_local_files( + data_path, ipinsights.data_location, num_records, FEATURE_DIM, sagemaker_session + ) + ipinsights.fit(records=record_set, job_name=job_name) + + with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): + model = IPInsightsModel( + ipinsights.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session + ) + predictor = model.deploy( + serverless_inference_config=ServerlessInferenceConfig(memory_size_in_mb=6144), + endpoint_name=job_name, + ) + assert isinstance(predictor, Predictor) + + predict_input = [["user_1", "1.1.1.1"]] + result = predictor.predict(predict_input) + + assert len(result["predictions"]) == 1 + assert 0 > result["predictions"][0]["dot_product"] > -1 # We expect ~ -0.22 diff --git a/tests/integ/test_kmeans.py b/tests/integ/test_kmeans.py index 056b068f3b..a48a98564d 100644 --- a/tests/integ/test_kmeans.py +++ b/tests/integ/test_kmeans.py @@ -18,6 +18,7 @@ import pytest from sagemaker import KMeans, KMeansModel +from sagemaker.serverless import ServerlessInferenceConfig from sagemaker.utils import unique_name_from_base from tests.integ import datasets, TRAINING_DEFAULT_TIMEOUT_MINUTES from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name @@ -134,3 +135,59 @@ def test_async_kmeans(sagemaker_session, cpu_instance_type, training_set): for record in result: assert record.label["closest_cluster"] is not None assert record.label["distance_to_cluster"] is not None + + +def test_kmeans_serverless_inference(sagemaker_session, cpu_instance_type, training_set): + job_name = unique_name_from_base("kmeans-serverless") + with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): + kmeans = KMeans( + role="SageMakerRole", + instance_count=1, + instance_type=cpu_instance_type, + k=10, + sagemaker_session=sagemaker_session, + ) + + kmeans.init_method = "random" + kmeans.max_iterations = 1 + kmeans.tol = 1 + kmeans.num_trials = 1 + kmeans.local_init_method = "kmeans++" + kmeans.half_life_time_size = 1 + kmeans.epochs = 1 + kmeans.center_factor = 1 + kmeans.eval_metrics = ["ssd", "msd"] + + assert kmeans.hyperparameters() == dict( + init_method=kmeans.init_method, + local_lloyd_max_iter=str(kmeans.max_iterations), + local_lloyd_tol=str(kmeans.tol), + local_lloyd_num_trials=str(kmeans.num_trials), + local_lloyd_init_method=kmeans.local_init_method, + half_life_time_size=str(kmeans.half_life_time_size), + epochs=str(kmeans.epochs), + extra_center_factor=str(kmeans.center_factor), + k=str(kmeans.k), + eval_metrics=json.dumps(kmeans.eval_metrics), + force_dense="True", + ) + + kmeans.fit(kmeans.record_set(training_set[0][:100]), job_name=job_name) + + with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): + model = KMeansModel( + kmeans.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session + ) + predictor = model.deploy( + serverless_inference_config=ServerlessInferenceConfig(), endpoint_name=job_name + ) + result = predictor.predict(training_set[0][:10]) + + assert len(result) == 10 + for record in result: + assert record.label["closest_cluster"] is not None + assert record.label["distance_to_cluster"] is not None + predictor.delete_model() + with pytest.raises(Exception) as exception: + sagemaker_session.sagemaker_client.describe_model(ModelName=model.name) + assert "Could not find model" in str(exception.value) diff --git a/tests/integ/test_knn.py b/tests/integ/test_knn.py index 9f44cc34a1..4121dd1e80 100644 --- a/tests/integ/test_knn.py +++ b/tests/integ/test_knn.py @@ -17,6 +17,7 @@ import pytest from sagemaker import KNN, KNNModel +from sagemaker.serverless import ServerlessInferenceConfig from sagemaker.utils import unique_name_from_base from tests.integ import datasets, TRAINING_DEFAULT_TIMEOUT_MINUTES from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name @@ -95,3 +96,35 @@ def test_async_knn_classifier(sagemaker_session, cpu_instance_type, training_set assert len(result) == 10 for record in result: assert record.label["score"] is not None + + +def test_knn_regressor_serverless_inference(sagemaker_session, cpu_instance_type, training_set): + job_name = unique_name_from_base("knn-serverless") + + with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): + knn = KNN( + role="SageMakerRole", + instance_count=1, + instance_type=cpu_instance_type, + k=10, + predictor_type="regressor", + sample_size=500, + sagemaker_session=sagemaker_session, + ) + + # training labels must be 'float32' + knn.fit( + knn.record_set(training_set[0][:200], training_set[1][:200].astype("float32")), + job_name=job_name, + ) + + with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): + model = KNNModel(knn.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session) + predictor = model.deploy( + serverless_inference_config=ServerlessInferenceConfig(), endpoint_name=job_name + ) + result = predictor.predict(training_set[0][:10]) + + assert len(result) == 10 + for record in result: + assert record.label["score"] is not None diff --git a/tests/integ/test_lda.py b/tests/integ/test_lda.py index 9dd039931e..4a80a0d8eb 100644 --- a/tests/integ/test_lda.py +++ b/tests/integ/test_lda.py @@ -20,6 +20,7 @@ import tests.integ from sagemaker import LDA, LDAModel from sagemaker.amazon.common import read_records +from sagemaker.serverless import ServerlessInferenceConfig from sagemaker.utils import unique_name_from_base from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name @@ -66,3 +67,47 @@ def test_lda(sagemaker_session, cpu_instance_type): assert len(result) == 1 for record in result: assert record.label["topic_mixture"] is not None + + +@pytest.mark.slow_test +@pytest.mark.skipif( + tests.integ.test_region() in tests.integ.NO_LDA_REGIONS, + reason="LDA image is not supported in certain regions", +) +def test_lda_serverless_inference(sagemaker_session, cpu_instance_type): + job_name = unique_name_from_base("lda-serverless") + + with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): + data_path = os.path.join(DATA_DIR, "lda") + data_filename = "nips-train_1.pbr" + + with open(os.path.join(data_path, data_filename), "rb") as f: + all_records = read_records(f) + + # all records must be same + feature_num = int(all_records[0].features["values"].float32_tensor.shape[0]) + + lda = LDA( + role="SageMakerRole", + instance_type=cpu_instance_type, + num_topics=10, + sagemaker_session=sagemaker_session, + ) + + record_set = prepare_record_set_from_local_files( + data_path, lda.data_location, len(all_records), feature_num, sagemaker_session + ) + lda.fit(records=record_set, mini_batch_size=100, job_name=job_name) + + with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): + model = LDAModel(lda.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session) + predictor = model.deploy( + serverless_inference_config=ServerlessInferenceConfig(), endpoint_name=job_name + ) + + predict_input = np.random.rand(1, feature_num) + result = predictor.predict(predict_input) + + assert len(result) == 1 + for record in result: + assert record.label["topic_mixture"] is not None diff --git a/tests/integ/test_linear_learner.py b/tests/integ/test_linear_learner.py index ab555cbe02..a5a7274453 100644 --- a/tests/integ/test_linear_learner.py +++ b/tests/integ/test_linear_learner.py @@ -192,3 +192,65 @@ def test_async_linear_learner(sagemaker_session, cpu_instance_type, training_set for record in result: assert record.label["predicted_label"] is not None assert record.label["score"] is not None + + +def test_linear_learner_serverless_inference(sagemaker_session, cpu_instance_type, training_set): + job_name = unique_name_from_base("linear-learner-serverless") + + with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): + training_set[1][:100] = 1 + training_set[1][100:200] = 0 + training_set = training_set[0], training_set[1].astype(np.dtype("float32")) + + ll = LinearLearner( + "SageMakerRole", + 1, + cpu_instance_type, + predictor_type="binary_classifier", + sagemaker_session=sagemaker_session, + ) + ll.binary_classifier_model_selection_criteria = "accuracy" + ll.target_recall = 0.5 + ll.target_precision = 0.5 + ll.positive_example_weight_mult = 0.1 + ll.epochs = 1 + ll.use_bias = True + ll.num_models = 1 + ll.num_calibration_samples = 1 + ll.init_method = "uniform" + ll.init_scale = 0.5 + ll.init_sigma = 0.2 + ll.init_bias = 5 + ll.optimizer = "adam" + ll.loss = "logistic" + ll.wd = 0.5 + ll.l1 = 0.5 + ll.momentum = 0.5 + ll.learning_rate = 0.1 + ll.beta_1 = 0.1 + ll.beta_2 = 0.1 + ll.use_lr_scheduler = True + ll.lr_scheduler_step = 2 + ll.lr_scheduler_factor = 0.5 + ll.lr_scheduler_minimum_lr = 0.1 + ll.normalize_data = False + ll.normalize_label = False + ll.unbias_data = True + ll.unbias_label = False + ll.num_point_for_scaler = 10000 + ll.margin = 1.0 + ll.quantile = 0.5 + ll.loss_insensitivity = 0.1 + ll.huber_delta = 0.1 + ll.early_stopping_tolerance = 0.0001 + ll.early_stopping_patience = 3 + ll.fit(ll.record_set(training_set[0][:200], training_set[1][:200]), job_name=job_name) + + with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): + predictor = ll.deploy(1, cpu_instance_type, endpoint_name=job_name) + + result = predictor.predict(training_set[0][0:100]) + assert len(result) == 100 + for record in result: + assert record.label["predicted_label"] is not None + assert record.label["score"] is not None diff --git a/tests/integ/test_mxnet.py b/tests/integ/test_mxnet.py index d13108d471..dac9221745 100644 --- a/tests/integ/test_mxnet.py +++ b/tests/integ/test_mxnet.py @@ -23,6 +23,7 @@ from sagemaker.mxnet.estimator import MXNet from sagemaker.mxnet.model import MXNetModel from sagemaker.mxnet.processing import MXNetProcessor +from sagemaker.serverless import ServerlessInferenceConfig from sagemaker.utils import sagemaker_timestamp from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES from tests.integ.kms_utils import get_or_create_kms_key @@ -419,6 +420,45 @@ def test_deploy_model_with_accelerator( assert result is not None +def test_deploy_model_with_serverless_inference_config( + mxnet_training_job, + sagemaker_session, + mxnet_inference_latest_version, + mxnet_inference_latest_py_version, +): + endpoint_name = "test-mxnet-deploy-model-serverless-{}".format(sagemaker_timestamp()) + + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): + desc = sagemaker_session.sagemaker_client.describe_training_job( + TrainingJobName=mxnet_training_job + ) + model_data = desc["ModelArtifacts"]["S3ModelArtifacts"] + script_path = os.path.join(DATA_DIR, "mxnet_mnist", "mnist.py") + model = MXNetModel( + model_data, + "SageMakerRole", + entry_point=script_path, + py_version=mxnet_inference_latest_py_version, + sagemaker_session=sagemaker_session, + framework_version=mxnet_inference_latest_version, + ) + predictor = model.deploy( + serverless_inference_config=ServerlessInferenceConfig(), endpoint_name=endpoint_name + ) + + data = numpy.zeros(shape=(1, 1, 28, 28)) + result = predictor.predict(data) + + print("==========Result is===========") + print(result) + assert result is not None + + model.delete_model() + with pytest.raises(Exception) as exception: + sagemaker_session.sagemaker_client.describe_model(ModelName=model.name) + assert "Could not find model" in str(exception.value) + + def test_async_fit( sagemaker_session, mxnet_training_latest_version, diff --git a/tests/integ/test_ntm.py b/tests/integ/test_ntm.py index c4f1f33d0f..f73260433b 100644 --- a/tests/integ/test_ntm.py +++ b/tests/integ/test_ntm.py @@ -17,8 +17,9 @@ import numpy as np import pytest -from sagemaker import NTM, NTMModel +from sagemaker import NTM, NTMModel, Predictor from sagemaker.amazon.common import read_records +from sagemaker.serverless import ServerlessInferenceConfig from sagemaker.utils import unique_name_from_base from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name @@ -66,3 +67,37 @@ def test_ntm(sagemaker_session, cpu_instance_type): assert len(result) == 1 for record in result: assert record.label["topic_weights"] is not None + + +def test_ntm_serverless_inference(sagemaker_session, cpu_instance_type): + job_name = unique_name_from_base("ntm-serverless") + + with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): + data_path = os.path.join(DATA_DIR, "ntm") + data_filename = "nips-train_1.pbr" + + with open(os.path.join(data_path, data_filename), "rb") as f: + all_records = read_records(f) + + # all records must be same + feature_num = int(all_records[0].features["values"].float32_tensor.shape[0]) + + ntm = NTM( + role="SageMakerRole", + instance_count=1, + instance_type=cpu_instance_type, + num_topics=10, + sagemaker_session=sagemaker_session, + ) + + record_set = prepare_record_set_from_local_files( + data_path, ntm.data_location, len(all_records), feature_num, sagemaker_session + ) + ntm.fit(records=record_set, job_name=job_name) + + with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): + model = NTMModel(ntm.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session) + predictor = model.deploy( + serverless_inference_config=ServerlessInferenceConfig(), endpoint_name=job_name + ) + assert isinstance(predictor, Predictor) diff --git a/tests/integ/test_object2vec.py b/tests/integ/test_object2vec.py index f56258540a..d1583ff631 100644 --- a/tests/integ/test_object2vec.py +++ b/tests/integ/test_object2vec.py @@ -18,6 +18,7 @@ from sagemaker.predictor import Predictor from sagemaker import Object2Vec, Object2VecModel +from sagemaker.serverless import ServerlessInferenceConfig from sagemaker.utils import unique_name_from_base from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name @@ -76,3 +77,45 @@ def test_object2vec(sagemaker_session, cpu_instance_type): assert len(result) == 1 for record in result: assert record.label["scores"] is not None + + +def test_object2vec_serverless_inference(sagemaker_session, cpu_instance_type): + job_name = unique_name_from_base("object2vec-serverless") + + with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): + data_path = os.path.join(DATA_DIR, "object2vec") + data_filename = "train.jsonl" + + with open(os.path.join(data_path, data_filename), "r") as f: + num_records = len(f.readlines()) + + object2vec = Object2Vec( + role="SageMakerRole", + instance_count=1, + instance_type=cpu_instance_type, + epochs=3, + enc0_max_seq_len=20, + enc0_vocab_size=45000, + enc_dim=16, + num_classes=3, + negative_sampling_rate=0, + comparator_list="hadamard,concat,abs_diff", + tied_token_embedding_weight=False, + token_embedding_storage_type="dense", + sagemaker_session=sagemaker_session, + ) + + record_set = prepare_record_set_from_local_files( + data_path, object2vec.data_location, num_records, FEATURE_NUM, sagemaker_session + ) + + object2vec.fit(records=record_set, job_name=job_name) + + with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): + model = Object2VecModel( + object2vec.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session + ) + predictor = model.deploy( + serverless_inference_config=ServerlessInferenceConfig(), endpoint_name=job_name + ) + assert isinstance(predictor, Predictor) diff --git a/tests/integ/test_pca.py b/tests/integ/test_pca.py index 91c061b9ed..cc340fb33b 100644 --- a/tests/integ/test_pca.py +++ b/tests/integ/test_pca.py @@ -17,6 +17,7 @@ import pytest import sagemaker.amazon.pca +from sagemaker.serverless import ServerlessInferenceConfig from sagemaker.utils import unique_name_from_base from tests.integ import datasets, TRAINING_DEFAULT_TIMEOUT_MINUTES from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name @@ -101,3 +102,38 @@ def test_async_pca(sagemaker_session, cpu_instance_type, training_set): assert len(result) == 5 for record in result: assert record.label["projection"] is not None + + +def test_pca_serverless_inference(sagemaker_session, cpu_instance_type, training_set): + job_name = unique_name_from_base("pca-serverless") + + with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): + pca = sagemaker.amazon.pca.PCA( + role="SageMakerRole", + instance_count=1, + instance_type=cpu_instance_type, + num_components=48, + sagemaker_session=sagemaker_session, + enable_network_isolation=True, + ) + + pca.algorithm_mode = "randomized" + pca.subtract_mean = True + pca.extra_components = 5 + pca.fit(pca.record_set(training_set[0][:100]), job_name=job_name) + + with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): + pca_model = sagemaker.amazon.pca.PCAModel( + model_data=pca.model_data, + role="SageMakerRole", + sagemaker_session=sagemaker_session, + ) + predictor = pca_model.deploy( + serverless_inference_config=ServerlessInferenceConfig(), endpoint_name=job_name + ) + + result = predictor.predict(training_set[0][:5]) + + assert len(result) == 5 + for record in result: + assert record.label["projection"] is not None diff --git a/tests/integ/test_pytorch.py b/tests/integ/test_pytorch.py index f9d0adf8a9..5e3f227e58 100644 --- a/tests/integ/test_pytorch.py +++ b/tests/integ/test_pytorch.py @@ -19,6 +19,7 @@ from sagemaker.pytorch.estimator import PyTorch from sagemaker.pytorch.model import PyTorchModel from sagemaker.pytorch.processing import PyTorchProcessor +from sagemaker.serverless import ServerlessInferenceConfig from sagemaker.utils import sagemaker_timestamp from tests.integ import ( test_region, @@ -264,6 +265,39 @@ def test_deploy_model_with_accelerator( assert output.shape == (batch_size, 10) +def test_deploy_model_with_serverless_inference_config( + pytorch_training_job, + sagemaker_session, + cpu_instance_type, + pytorch_inference_latest_version, + pytorch_inference_latest_py_version, +): + endpoint_name = "test-pytorch-deploy-model-serverless-{}".format(sagemaker_timestamp()) + + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): + desc = sagemaker_session.sagemaker_client.describe_training_job( + TrainingJobName=pytorch_training_job + ) + model_data = desc["ModelArtifacts"]["S3ModelArtifacts"] + model = PyTorchModel( + model_data, + "SageMakerRole", + entry_point=MNIST_SCRIPT, + framework_version=pytorch_inference_latest_version, + py_version=pytorch_inference_latest_py_version, + sagemaker_session=sagemaker_session, + ) + predictor = model.deploy( + serverless_inference_config=ServerlessInferenceConfig(), endpoint_name=endpoint_name + ) + + batch_size = 100 + data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32) + output = predictor.predict(data) + + assert output.shape == (batch_size, 10) + + def _upload_training_data(pytorch): return pytorch.sagemaker_session.upload_data( path=os.path.join(MNIST_DIR, "training"), diff --git a/tests/integ/test_randomcutforest.py b/tests/integ/test_randomcutforest.py index 36621b5c5a..baa2fed5fc 100644 --- a/tests/integ/test_randomcutforest.py +++ b/tests/integ/test_randomcutforest.py @@ -15,6 +15,7 @@ import numpy as np from sagemaker import RandomCutForest, RandomCutForestModel +from sagemaker.serverless import ServerlessInferenceConfig from sagemaker.utils import unique_name_from_base from tests.integ import TRAINING_DEFAULT_TIMEOUT_MINUTES from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name @@ -53,3 +54,40 @@ def test_randomcutforest(sagemaker_session, cpu_instance_type): for record in result: assert record.label["score"] is not None assert len(record.label["score"].float32_tensor.values) == 1 + + +def test_randomcutforest_serverless_inference(sagemaker_session, cpu_instance_type): + job_name = unique_name_from_base("randomcutforest") + + with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): + # Generate a thousand 14-dimensional datapoints. + feature_num = 14 + train_input = np.random.rand(1000, feature_num) + + rcf = RandomCutForest( + role="SageMakerRole", + instance_count=1, + instance_type=cpu_instance_type, + num_trees=50, + num_samples_per_tree=20, + eval_metrics=["accuracy", "precision_recall_fscore"], + sagemaker_session=sagemaker_session, + ) + + rcf.fit(records=rcf.record_set(train_input), job_name=job_name) + + with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session): + model = RandomCutForestModel( + rcf.model_data, role="SageMakerRole", sagemaker_session=sagemaker_session + ) + predictor = model.deploy( + serverless_inference_config=ServerlessInferenceConfig(), endpoint_name=job_name + ) + + predict_input = np.random.rand(1, feature_num) + result = predictor.predict(predict_input) + + assert len(result) == 1 + for record in result: + assert record.label["score"] is not None + assert len(record.label["score"].float32_tensor.values) == 1 diff --git a/tests/integ/test_sklearn.py b/tests/integ/test_sklearn.py index 964e1a22fa..ad05acdb75 100644 --- a/tests/integ/test_sklearn.py +++ b/tests/integ/test_sklearn.py @@ -18,6 +18,7 @@ import pytest import numpy +from sagemaker.serverless import ServerlessInferenceConfig from sagemaker.sklearn import SKLearn, SKLearnModel, SKLearnProcessor from sagemaker.utils import sagemaker_timestamp, unique_name_from_base from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES @@ -155,6 +156,32 @@ def test_deploy_model( _predict_and_assert(predictor) +def test_deploy_model_with_serverless_inference_config( + sklearn_training_job, + sagemaker_session, + sklearn_latest_version, + sklearn_latest_py_version, +): + endpoint_name = "test-sklearn-deploy-model-serverless-{}".format(sagemaker_timestamp()) + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): + desc = sagemaker_session.sagemaker_client.describe_training_job( + TrainingJobName=sklearn_training_job + ) + model_data = desc["ModelArtifacts"]["S3ModelArtifacts"] + script_path = os.path.join(DATA_DIR, "sklearn_mnist", "mnist.py") + model = SKLearnModel( + model_data, + ROLE, + entry_point=script_path, + framework_version=sklearn_latest_version, + sagemaker_session=sagemaker_session, + ) + predictor = model.deploy( + serverless_inference_config=ServerlessInferenceConfig(), endpoint_name=endpoint_name + ) + _predict_and_assert(predictor) + + @pytest.mark.skip( reason="This test has always failed, but the failure was masked by a bug. " "This test should be fixed. Details in https://github.com/aws/sagemaker-python-sdk/pull/968" @@ -277,11 +304,3 @@ def _predict_and_assert(predictor): data = numpy.zeros((batch_size, 784), dtype="float32") output = predictor.predict(data) assert len(output) == batch_size - - data = numpy.zeros((batch_size, 1, 28, 28), dtype="float32") - output = predictor.predict(data) - assert len(output) == batch_size - - data = numpy.zeros((batch_size, 28, 28), dtype="float32") - output = predictor.predict(data) - assert len(output) == batch_size diff --git a/tests/integ/test_tf.py b/tests/integ/test_tf.py index bb865a2dd5..30b9940e5d 100644 --- a/tests/integ/test_tf.py +++ b/tests/integ/test_tf.py @@ -18,7 +18,8 @@ import pytest -from sagemaker.tensorflow import TensorFlow, TensorFlowProcessor +from sagemaker.serverless import ServerlessInferenceConfig +from sagemaker.tensorflow import TensorFlow, TensorFlowProcessor, TensorFlowModel from sagemaker.utils import unique_name_from_base, sagemaker_timestamp import tests.integ @@ -293,6 +294,39 @@ def test_deploy_with_input_handlers( assert expected_result == result +def test_model_deploy_with_serverless_inference_config( + sagemaker_session, tf_full_version, tf_full_py_version +): + endpoint_name = unique_name_from_base("sagemaker-tensorflow-serverless") + model_data = sagemaker_session.upload_data( + path=os.path.join(tests.integ.DATA_DIR, "tensorflow-serving-test-model.tar.gz"), + key_prefix="tensorflow-serving/models", + ) + with tests.integ.timeout.timeout_and_delete_endpoint_by_name( + endpoint_name=endpoint_name, + sagemaker_session=sagemaker_session, + hours=2, + sleep_between_cleanup_attempts=20, + exponential_sleep=True, + ): + model = TensorFlowModel( + model_data=model_data, + role=ROLE, + framework_version=tf_full_version, + sagemaker_session=sagemaker_session, + ) + predictor = model.deploy( + serverless_inference_config=ServerlessInferenceConfig(), + endpoint_name=endpoint_name, + ) + + input_data = {"instances": [1.0, 2.0, 5.0]} + expected_result = {"predictions": [3.5, 4.0, 5.5]} + + result = predictor.predict(input_data) + assert expected_result == result + + def _assert_tags_match(sagemaker_client, resource_arn, tags, retry_count=15): # endpoint and training tags might take minutes to propagate. for _ in retries(retry_count, "Getting endpoint tags", seconds_to_sleep=30): diff --git a/tests/integ/test_xgboost.py b/tests/integ/test_xgboost.py index 088a55d7f3..733ab4665a 100644 --- a/tests/integ/test_xgboost.py +++ b/tests/integ/test_xgboost.py @@ -14,15 +14,32 @@ import os import pytest + +from sagemaker.serverless import ServerlessInferenceConfig from sagemaker.utils import unique_name_from_base -from sagemaker.xgboost import XGBoost +from sagemaker.xgboost import XGBoost, XGBoostModel from sagemaker.xgboost.processing import XGBoostProcessor from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES -from tests.integ.timeout import timeout +from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name ROLE = "SageMakerRole" +@pytest.fixture(scope="module") +def xgboost_training_job( + sagemaker_session, + xgboost_latest_version, + xgboost_latest_py_version, + cpu_instance_type, +): + return _run_mnist_training_job( + sagemaker_session, + cpu_instance_type, + xgboost_latest_version, + xgboost_latest_py_version, + ) + + @pytest.mark.release def test_framework_processing_job_with_deps( sagemaker_session, @@ -82,3 +99,56 @@ def test_training_with_network_isolation( assert sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=job_name)[ "EnableNetworkIsolation" ] + + +def test_xgboost_serverless_inference( + xgboost_training_job, + sagemaker_session, + xgboost_latest_version, +): + endpoint_name = unique_name_from_base("test-xgboost-deploy-model-serverless") + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): + desc = sagemaker_session.sagemaker_client.describe_training_job( + TrainingJobName=xgboost_training_job + ) + model_data = desc["ModelArtifacts"]["S3ModelArtifacts"] + + xgboost = XGBoostModel( + sagemaker_session=sagemaker_session, + model_data=model_data, + role=ROLE, + entry_point=os.path.join(DATA_DIR, "xgboost_abalone", "abalone.py"), + framework_version=xgboost_latest_version, + ) + + xgboost.deploy( + serverless_inference_config=ServerlessInferenceConfig(), endpoint_name=endpoint_name + ) + + +def _run_mnist_training_job( + sagemaker_session, cpu_instance_type, xgboost_latest_version, xgboost_latest_py_version +): + with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): + base_job_name = "test-xgboost-mnist" + + xgboost = XGBoost( + entry_point=os.path.join(DATA_DIR, "xgboost_abalone", "abalone.py"), + role=ROLE, + instance_type=cpu_instance_type, + instance_count=1, + framework_version=xgboost_latest_version, + py_version=xgboost_latest_py_version, + base_job_name=base_job_name, + sagemaker_session=sagemaker_session, + enable_network_isolation=True, + ) + + train_input = xgboost.sagemaker_session.upload_data( + path=os.path.join(DATA_DIR, "xgboost_abalone", "abalone"), + key_prefix="integ-test-data/xgboost_abalone/abalone", + ) + job_name = unique_name_from_base(base_job_name) + xgboost.fit(inputs={"train": train_input}, job_name=job_name) + + return xgboost.latest_training_job.name diff --git a/tests/unit/sagemaker/model/test_deploy.py b/tests/unit/sagemaker/model/test_deploy.py index 33366578de..e919b42c60 100644 --- a/tests/unit/sagemaker/model/test_deploy.py +++ b/tests/unit/sagemaker/model/test_deploy.py @@ -62,7 +62,9 @@ def test_deploy(name_from_base, prepare_container_def, production_variant, sagem name_from_base.assert_called_with(MODEL_IMAGE) assert 2 == name_from_base.call_count - prepare_container_def.assert_called_with(INSTANCE_TYPE, accelerator_type=None) + prepare_container_def.assert_called_with( + INSTANCE_TYPE, accelerator_type=None, serverless_inference_config=None + ) production_variant.assert_called_with( MODEL_NAME, INSTANCE_TYPE, @@ -106,7 +108,7 @@ def test_deploy_accelerator_type( accelerator_type=ACCELERATOR_TYPE, ) - create_sagemaker_model.assert_called_with(INSTANCE_TYPE, ACCELERATOR_TYPE, None) + create_sagemaker_model.assert_called_with(INSTANCE_TYPE, ACCELERATOR_TYPE, None, None) production_variant.assert_called_with( MODEL_NAME, INSTANCE_TYPE, @@ -212,7 +214,7 @@ def test_deploy_tags(create_sagemaker_model, production_variant, name_from_base, tags = [{"Key": "ModelName", "Value": "TestModel"}] model.deploy(instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT, tags=tags) - create_sagemaker_model.assert_called_with(INSTANCE_TYPE, None, tags) + create_sagemaker_model.assert_called_with(INSTANCE_TYPE, None, tags, None) sagemaker_session.endpoint_from_production_variants.assert_called_with( name=ENDPOINT_NAME, production_variants=[BASE_PRODUCTION_VARIANT], @@ -349,7 +351,7 @@ def test_deploy_serverless_inference(production_variant, create_sagemaker_model, serverless_inference_config=serverless_inference_config, ) - create_sagemaker_model.assert_called_with(None, None, None) + create_sagemaker_model.assert_called_with(None, None, None, serverless_inference_config) production_variant.assert_called_with( MODEL_NAME, None, diff --git a/tests/unit/sagemaker/model/test_model.py b/tests/unit/sagemaker/model/test_model.py index 8befff7c77..b66fda908d 100644 --- a/tests/unit/sagemaker/model/test_model.py +++ b/tests/unit/sagemaker/model/test_model.py @@ -140,7 +140,9 @@ def test_create_sagemaker_model(prepare_container_def, sagemaker_session): model = Model(MODEL_DATA, MODEL_IMAGE, name=MODEL_NAME, sagemaker_session=sagemaker_session) model._create_sagemaker_model() - prepare_container_def.assert_called_with(None, accelerator_type=None) + prepare_container_def.assert_called_with( + None, accelerator_type=None, serverless_inference_config=None + ) sagemaker_session.create_model.assert_called_with( MODEL_NAME, None, container_def, vpc_config=None, enable_network_isolation=False, tags=None ) @@ -151,7 +153,9 @@ def test_create_sagemaker_model_instance_type(prepare_container_def, sagemaker_s model = Model(MODEL_DATA, MODEL_IMAGE, name=MODEL_NAME, sagemaker_session=sagemaker_session) model._create_sagemaker_model(INSTANCE_TYPE) - prepare_container_def.assert_called_with(INSTANCE_TYPE, accelerator_type=None) + prepare_container_def.assert_called_with( + INSTANCE_TYPE, accelerator_type=None, serverless_inference_config=None + ) @patch("sagemaker.model.Model.prepare_container_def") @@ -161,7 +165,9 @@ def test_create_sagemaker_model_accelerator_type(prepare_container_def, sagemake accelerator_type = "ml.eia.medium" model._create_sagemaker_model(INSTANCE_TYPE, accelerator_type=accelerator_type) - prepare_container_def.assert_called_with(INSTANCE_TYPE, accelerator_type=accelerator_type) + prepare_container_def.assert_called_with( + INSTANCE_TYPE, accelerator_type=accelerator_type, serverless_inference_config=None + ) @patch("sagemaker.model.Model.prepare_container_def") diff --git a/tests/unit/sagemaker/tensorflow/test_tfs.py b/tests/unit/sagemaker/tensorflow/test_tfs.py index 351b3fbb11..322f2e4379 100644 --- a/tests/unit/sagemaker/tensorflow/test_tfs.py +++ b/tests/unit/sagemaker/tensorflow/test_tfs.py @@ -86,6 +86,7 @@ def test_tfs_model(retrieve_image_uri, sagemaker_session, tensorflow_inference_v instance_type=INSTANCE_TYPE, accelerator_type=None, image_scope="inference", + serverless_inference_config=None, ) assert IMAGE == cdef["Image"] assert {} == cdef["Environment"] @@ -110,6 +111,7 @@ def test_tfs_model_accelerator(retrieve_image_uri, sagemaker_session, tensorflow instance_type=INSTANCE_TYPE, accelerator_type=ACCELERATOR_TYPE, image_scope="inference", + serverless_inference_config=None, ) assert IMAGE == cdef["Image"] diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py index 35e022816b..222d61b3d0 100644 --- a/tests/unit/test_estimator.py +++ b/tests/unit/test_estimator.py @@ -189,7 +189,9 @@ def __init__(self, sagemaker_session, entry_point=None, role=ROLE, **kwargs): def create_predictor(self, endpoint_name): return None - def prepare_container_def(self, instance_type, accelerator_type=None): + def prepare_container_def( + self, instance_type, accelerator_type=None, serverless_inference_config=None + ): return MODEL_CONTAINER_DEF