fix: refactor endpoint type enums, comments, docstrings, method names… (#1406)

bhaoz · knikure · commit 9cfab6f15e2f · 2023-11-27T19:09:15.000Z
diff --git a/src/sagemaker/base_predictor.py b/src/sagemaker/base_predictor.py
@@ -399,9 +399,9 @@ def update_endpoint(
         new_endpoint_config_name = name_from_base(current_endpoint_config_name)
 
         if self._get_component_name():
-            endpoint_type = EndpointType.GEN2
+            endpoint_type = EndpointType.INFERENCE_COMPONENT_BASED
         else:
-            endpoint_type = EndpointType.GEN1
+            endpoint_type = EndpointType.MODEL_BASED
 
         self.sagemaker_session.create_endpoint_config_from_existing(
             current_endpoint_config_name,
@@ -442,8 +442,8 @@ def delete_endpoint(self, delete_endpoint_config=True):
     def delete_predictor(self, wait: bool = False) -> None:
         """Delete the Amazon SageMaker inference component or endpoint backing this predictor.
 
-        Delete the corresponding inference component if the endpoint is a Generation2
-        endpoint.
+        Delete the corresponding inference component if the endpoint is a inference component
+        based endpoint.
         Otherwise delete the endpoint where this predictor is hosted.
         """
 
@@ -485,8 +485,9 @@ def update_predictor(
                 https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests
                 (Default: None).
             resources (Optional[ResourceRequirements]): The compute resource requirements
-                for a model to be deployed to an endpoint. Only EndpointType.GEN2 supports
-                this feature. (Default: None).
+                for a model to be deployed to an endpoint.
+                Only EndpointType.INFERENCE_COMPONENT_BASED supports this feature.
+                (Default: None).
         """
         if self.component_name is None:
             raise ValueError(
diff --git a/src/sagemaker/enums.py b/src/sagemaker/enums.py
@@ -24,5 +24,7 @@
 class EndpointType(Enum):
     """Types of endpoint"""
 
-    GEN1 = "gen1"  # Amazon SageMaker Endpoint Generation 1
-    GEN2 = "gen2"  # Amazon SageMaker Endpoint Generation 2
+    MODEL_BASED = "ModelBased"  # Amazon SageMaker Model Based Endpoint
+    INFERENCE_COMPONENT_BASED = (
+        "InferenceComponentBased"  # Amazon SageMaker Inference Component Based Endpoint
+    )
diff --git a/src/sagemaker/jumpstart/factory/model.py b/src/sagemaker/jumpstart/factory/model.py
@@ -561,7 +561,7 @@ def get_deploy_kwargs(
 
     deploy_kwargs = _add_tags_to_kwargs(kwargs=deploy_kwargs)
 
-    if endpoint_type == EndpointType.GEN2:
+    if endpoint_type == EndpointType.INFERENCE_COMPONENT_BASED:
         deploy_kwargs = _add_resources_to_kwargs(kwargs=deploy_kwargs)
         deploy_kwargs.endpoint_type = endpoint_type
         deploy_kwargs.managed_instance_scaling = managed_instance_scaling
diff --git a/src/sagemaker/jumpstart/model.py b/src/sagemaker/jumpstart/model.py
@@ -263,8 +263,9 @@ def __init__(
                 can be just the name if your account owns the Model Package.
                 ``model_data`` is not required. (Default: None).
             resources (Optional[ResourceRequirements]): The compute resource requirements
-                for a model to be deployed to an endpoint. Only EndpointType.GEN2 supports
-                this feature. (Default: None).
+                for a model to be deployed to an endpoint.
+                Only EndpointType.INFERENCE_COMPONENT_BASED supports this feature.
+                (Default: None).
         Raises:
             ValueError: If the model ID is not recognized by JumpStart.
         """
@@ -460,7 +461,7 @@ def deploy(
         endpoint_logging: Optional[bool] = False,
         resources: Optional[ResourceRequirements] = None,
         managed_instance_scaling: Optional[str] = None,
-        endpoint_type: EndpointType = EndpointType.GEN1,
+        endpoint_type: EndpointType = EndpointType.MODEL_BASED,
     ) -> PredictorBase:
         """Creates endpoint by calling base ``Model`` class `deploy` method.
 
@@ -547,13 +548,14 @@ def deploy(
             endpoint_logging (Optiona[bool]): If set to true, live logging will be emitted as
                 the SageMaker Endpoint starts up. (Default: False).
             resources (Optional[ResourceRequirements]): The compute resource requirements
-                for a model to be deployed to an endpoint. Only EndpointType.GEN2 supports
-                this feature. (Default: None).
+                for a model to be deployed to an endpoint. Only
+                EndpointType.INFERENCE_COMPONENT_BASED supports this feature.
+                (Default: None).
             managed_instance_scaling (Optional[Dict]): Managed intance scaling options,
                 if configured Amazon SageMaker will manage the instance number behind the
                 endpoint.
             endpoint_type (EndpointType): The type of endpoint used to deploy models.
-                (Default: EndpointType.GEN1).
+                (Default: EndpointType.MODEL_BASED).
         """
 
         deploy_kwargs = get_deploy_kwargs(
diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py
@@ -312,8 +312,9 @@ def __init__(
                 the SageMaker Python SDK attempts to use either the CodeCommit
                 credential helper or local credential storage for authentication.
             resources (Optional[ResourceRequirements]): The compute resource requirements
-                for a model to be deployed to an endpoint. Only EndpointType.GEN2 supports
-                this feature. (Default: None).
+                for a model to be deployed to an endpoint. Only
+                EndpointType.INFERENCE_COMPONENT_BASED supports this feature.
+                (Default: None).
 
         """
         self.model_data = model_data
@@ -1275,7 +1276,7 @@ def deploy(
         accept_eula: Optional[bool] = None,
         endpoint_logging=False,
         resources: Optional[ResourceRequirements] = None,
-        endpoint_type: EndpointType = EndpointType.GEN1,
+        endpoint_type: EndpointType = EndpointType.MODEL_BASED,
         managed_instance_scaling: Optional[str] = None,
         **kwargs,
     ):
@@ -1367,13 +1368,13 @@ def deploy(
             endpoint_logging (Optiona[bool]): If set to true, live logging will be emitted as
                 the SageMaker Endpoint starts up. (Default: False).
             resources (Optional[ResourceRequirements]): The compute resource requirements
-                for a model to be deployed to an endpoint. Only EndpointType.GEN2 supports
-                this feature. (Default: None).
+                for a model to be deployed to an endpoint. Only
+                EndpointType.INFERENCE_COMPONENT_BASED supports this feature. (Default: None).
             managed_instance_scaling (Optional[Dict]): Managed instance scaling options,
                 if configured Amazon SageMaker will manage the instance number behind the
                 Endpoint. (Default: None).
             endpoint_type (Optional[EndpointType]): The type of an endpoint used to deploy models.
-                (Default: EndpointType.GEN1).
+                (Default: EndpointType.MODEL_BASED).
         Raises:
              ValueError: If arguments combination check failed in these circumstances:
                 - If no role is specified or
@@ -1474,7 +1475,7 @@ def deploy(
                 self._base_name = "-".join((self._base_name, compiled_model_suffix))
 
         # Support multiple models on same endpoint
-        if endpoint_type == EndpointType.GEN2:
+        if endpoint_type == EndpointType.INFERENCE_COMPONENT_BASED:
             if endpoint_name:
                 self.endpoint_name = endpoint_name
             else:
diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py
@@ -4069,7 +4069,7 @@ def create_endpoint_config_from_existing(
         new_data_capture_config_dict=None,
         new_production_variants=None,
         new_explainer_config_dict=None,
-        endpoint_type=EndpointType.GEN1,
+        endpoint_type=EndpointType.MODEL_BASED,
     ):
         """Create an Amazon SageMaker endpoint configuration from an existing one.
 
@@ -4119,7 +4119,7 @@ def create_endpoint_config_from_existing(
         production_variants = (
             new_production_variants or existing_endpoint_config_desc["ProductionVariants"]
         )
-        if endpoint_type == EndpointType.GEN2:
+        if endpoint_type == EndpointType.INFERENCE_COMPONENT_BASED:
             # Make a copy of Production variants and remove the InitialVariantWeight
             # in the copy
             copy_production_variants = deepcopy(production_variants)
@@ -5278,9 +5278,9 @@ def endpoint_from_production_variants(
             sagemaker_config=load_sagemaker_config() if (self is None) else None,
         )
 
-        # For Amazon SageMaker Generation 2 Endpoint, it will not pass Model names
-        # during Endpoint creation. Instead, ExecutionRoleArn will be needed in the
-        # EndpointConfig to create Endpoint
+        # For Amazon SageMaker inference component based endpoint, it will not pass
+        # Model names during endpoint creation. Instead, ExecutionRoleArn will be
+        # needed in the endpoint config to create Endpoint
         model_names = [pv["ModelName"] for pv in production_variants if "ModelName" in pv]
         if len(model_names) == 0:
             # Currently, SageMaker Python SDK allow using RoleName to deploy models.
diff --git a/tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py b/tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py
@@ -67,7 +67,7 @@ def test_non_prepacked_jumpstart_model(setup):
     assert response is not None
 
 
-def test_non_prepacked_jumpstart_model_deployed_on_gen2_endpoint(setup):
+def test_non_prepacked_jumpstart_model_deployed_on_inference_component_based_endpoint(setup):
 
     model_id = "huggingface-llm-falcon-7b-instruct-bf16"  # default g5.2xlarge
 
@@ -77,7 +77,7 @@ def test_non_prepacked_jumpstart_model_deployed_on_gen2_endpoint(setup):
         sagemaker_session=get_sm_session(),
     )
 
-    predictor = model.deploy(endpoint_type=EndpointType.GEN2)
+    predictor = model.deploy(endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED)
 
     inference_input = {
         "inputs": "Girafatron is obsessed with giraffes, the most glorious animal on the "
diff --git a/tests/integ/test_huggingface.py b/tests/integ/test_huggingface.py
@@ -177,9 +177,9 @@ def test_huggingface_inference(
 
 
 @pytest.mark.skip(
-    reason="re-enable when above GEN1 endpoint hugging face inference test enabled",
+    reason="re-enable when above MODEL_BASED endpoint hugging face inference test enabled",
 )
-def test_huggingface_inference_gen2_endpoint(
+def test_huggingface_inference_inference_component_based_endpoint(
     sagemaker_session,
     gpu_pytorch_instance_type,
     huggingface_inference_latest_version,
@@ -204,7 +204,7 @@ def test_huggingface_inference_gen2_endpoint(
         instance_type=gpu_pytorch_instance_type,
         initial_instance_count=1,
         endpoint_name=endpoint_name,
-        endpoint_type=EndpointType.GEN2,
+        endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED,
         resources=ResourceRequirements(
             requests={
                 "num_accelerators": 1,  # NumberOfCpuCoresRequired
diff --git a/tests/integ/test_inference_component_based_endpoint.py b/tests/integ/test_inference_component_based_endpoint.py
@@ -120,7 +120,7 @@ def test_deploy_single_model_with_endpoint_name(tfs_model, resources):
         1,
         "ml.m5.large",
         endpoint_name=endpoint_name,
-        endpoint_type=EndpointType.GEN2,
+        endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED,
         resources=resources,
     )
 
@@ -140,10 +140,7 @@ def test_deploy_single_model_with_endpoint_name(tfs_model, resources):
     predictor.delete_endpoint()
 
 
-@pytest.mark.slow_test
-@pytest.mark.skip(
-    reason="Disable until us-west-2 production become stable",
-)
+@pytest.mark.release
 def test_deploy_update_predictor_with_other_model(
     tfs_model,
     resources,
@@ -155,7 +152,7 @@ def test_deploy_update_predictor_with_other_model(
         1,
         "ml.m5.4xlarge",
         endpoint_name=endpoint_name,
-        endpoint_type=EndpointType.GEN2,
+        endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED,
         resources=resources,
     )
 
@@ -172,7 +169,7 @@ def test_deploy_update_predictor_with_other_model(
         1,
         "ml.m5.4xlarge",
         endpoint_name=endpoint_name,
-        endpoint_type=EndpointType.GEN2,
+        endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED,
         resources=resources,
     )
     xgboost_predictor.serializer = CSVSerializer()
@@ -208,7 +205,7 @@ def test_deploy_multi_models_without_endpoint_name(tfs_model, resources):
     tfs_predictor1 = tfs_model.deploy(
         1,
         "ml.m5.large",
-        endpoint_type=EndpointType.GEN2,
+        endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED,
         resources=resources,
     )
 
@@ -221,7 +218,7 @@ def test_deploy_multi_models_without_endpoint_name(tfs_model, resources):
         1,
         "ml.m5.large",
         endpoint_name=endpoint_name,
-        endpoint_type=EndpointType.GEN2,
+        endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED,
         resources=resources,
     )
 
diff --git a/tests/unit/sagemaker/jumpstart/model/test_model.py b/tests/unit/sagemaker/jumpstart/model/test_model.py
@@ -34,7 +34,7 @@
 from tests.unit.sagemaker.jumpstart.utils import (
     get_special_model_spec,
     overwrite_dictionary,
-    get_special_model_spec_for_gen2_endpoint,
+    get_special_model_spec_for_inference_component_based_endpoint,
 )
 
 execution_role = "fake role! do not use!"
@@ -125,7 +125,7 @@ def test_non_prepacked(
     @mock.patch("sagemaker.jumpstart.model.Model.__init__")
     @mock.patch("sagemaker.jumpstart.model.Model.deploy")
     @mock.patch("sagemaker.jumpstart.factory.model.JUMPSTART_DEFAULT_REGION_NAME", region)
-    def test_non_prepacked_gen2_endpoint(
+    def test_non_prepacked_inference_component_based_endpoint(
         self,
         mock_model_deploy: mock.Mock,
         mock_model_init: mock.Mock,
@@ -141,7 +141,9 @@ def test_non_prepacked_gen2_endpoint(
         mock_is_valid_model_id.return_value = True
         model_id, _ = "js-trainable-model", "*"
 
-        mock_get_model_specs.side_effect = get_special_model_spec_for_gen2_endpoint
+        mock_get_model_specs.side_effect = (
+            get_special_model_spec_for_inference_component_based_endpoint
+        )
 
         mock_session.return_value = sagemaker_session
 
@@ -180,7 +182,7 @@ def test_non_prepacked_gen2_endpoint(
             resources=resource_requirements,
         )
 
-        model.deploy(endpoint_type=EndpointType.GEN2)
+        model.deploy(endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED)
 
         mock_model_deploy.assert_called_once_with(
             initial_instance_count=1,
@@ -193,7 +195,7 @@ def test_non_prepacked_gen2_endpoint(
             ],
             endpoint_logging=False,
             resources=resource_requirements,
-            endpoint_type=EndpointType.GEN2,
+            endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED,
         )
 
     @mock.patch("sagemaker.utils.sagemaker_timestamp")
@@ -203,7 +205,7 @@ def test_non_prepacked_gen2_endpoint(
     @mock.patch("sagemaker.jumpstart.model.Model.__init__")
     @mock.patch("sagemaker.jumpstart.model.Model.deploy")
     @mock.patch("sagemaker.jumpstart.factory.model.JUMPSTART_DEFAULT_REGION_NAME", region)
-    def test_non_prepacked_gen2_endpoint_no_default_pass_custom_resources(
+    def test_non_prepacked_inference_component_based_endpoint_no_default_pass_custom_resources(
         self,
         mock_model_deploy: mock.Mock,
         mock_model_init: mock.Mock,
@@ -254,7 +256,7 @@ def test_non_prepacked_gen2_endpoint_no_default_pass_custom_resources(
         )
 
         model.deploy(
-            endpoint_type=EndpointType.GEN2,
+            endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED,
             resources=custom_resource_requirements,
         )
 
@@ -268,7 +270,7 @@ def test_non_prepacked_gen2_endpoint_no_default_pass_custom_resources(
             ],
             endpoint_logging=False,
             resources=custom_resource_requirements,
-            endpoint_type=EndpointType.GEN2,
+            endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED,
         )
 
     @mock.patch("sagemaker.jumpstart.model.is_valid_model_id")
diff --git a/tests/unit/sagemaker/jumpstart/utils.py b/tests/unit/sagemaker/jumpstart/utils.py
@@ -112,16 +112,16 @@ def get_special_model_spec(
     return specs
 
 
-def get_special_model_spec_for_gen2_endpoint(
+def get_special_model_spec_for_inference_component_based_endpoint(
     region: str = None,
     model_id: str = None,
     version: str = None,
     s3_client: boto3.client = None,
 ) -> JumpStartModelSpecs:
     """This function mocks cache accessor functions. For this mock,
     we only retrieve model specs based on the model ID and adding
-    generation 2 endpoint specific specification. This is reserved
-    for special specs.
+    inference component based endpoint specific specification.
+    This is reserved for special specs.
     """
     model_spec_dict = SPECIAL_MODEL_SPECS_DICT[model_id]
     model_spec_dict["hosting_resource_requirements"] = {
diff --git a/tests/unit/sagemaker/model/test_deploy.py b/tests/unit/sagemaker/model/test_deploy.py
@@ -1009,7 +1009,7 @@ def test_deploy_with_name_and_resources(sagemaker_session):
         MODEL_IMAGE, MODEL_DATA, name=MODEL_NAME, role=ROLE, sagemaker_session=sagemaker_session
     )
 
-    endpoint_name = "Gen2-endpoint-test"
+    endpoint_name = "inference-component-based-endpoint-test"
     model.deploy(
         endpoint_name=endpoint_name,
         instance_type=INSTANCE_TYPE,
diff --git a/tests/unit/sagemaker/model/test_model.py b/tests/unit/sagemaker/model/test_model.py
@@ -858,7 +858,7 @@ def test_script_mode_model_uses_jumpstart_base_name(repack_model, sagemaker_sess
 
 @patch("sagemaker.utils.repack_model")
 @patch("sagemaker.fw_utils.tar_and_upload_dir")
-def test_all_framework_models_generation_two_endpoint_deploy_path(
+def test_all_framework_models_inference_component_based_endpoint_deploy_path(
     repack_model, tar_and_uload_dir, sagemaker_session
 ):
     framework_model_classes_to_kwargs = {
@@ -893,7 +893,7 @@ def test_all_framework_models_generation_two_endpoint_deploy_path(
         ).deploy(
             instance_type="ml.m2.xlarge",
             initial_instance_count=INSTANCE_COUNT,
-            endpoint_type=EndpointType.GEN2,
+            endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED,
             resources=ResourceRequirements(
                 requests={
                     "num_accelerators": 1,
@@ -904,7 +904,7 @@ def test_all_framework_models_generation_two_endpoint_deploy_path(
             ),
         )
 
-        # Verified Generation2 endpoint and inference component creation
+        # Verified inference component based endpoint and inference component creation
         # path
         sagemaker_session.endpoint_in_service_or_not.assert_called_once()
         sagemaker_session.create_model.assert_called_once()
diff --git a/tests/unit/test_predictor.py b/tests/unit/test_predictor.py

Original file line number	Diff line number	Diff line change
`@@ -1009,7 +1009,7 @@ def test_deploy_with_name_and_resources(sagemaker_session):`
`1009`	`1009`	`MODEL_IMAGE, MODEL_DATA, name=MODEL_NAME, role=ROLE, sagemaker_session=sagemaker_session`
`1010`	`1010`	`)`
`1011`	`1011`
`1012`		`- endpoint_name = "Gen2-endpoint-test"`
	`1012`	`+ endpoint_name = "inference-component-based-endpoint-test"`
`1013`	`1013`	`model.deploy(`
`1014`	`1014`	`endpoint_name=endpoint_name,`
`1015`	`1015`	`instance_type=INSTANCE_TYPE,`