feature: Changes to support remote schema retrieval for task types (question-answering, fill-mask) and added e2e tests for both local and remote hf schema logic. (#4572)

samruds · ShailavTaneja · web-flow · commit a72dc6d1142a · 2024-04-10T18:34:06.000-07:00
* feature: Add and use sagemaker_schema_inference_artifacts dependency for huggingface in schema builder (question-answering only)

* feature: Switch to remote schema for hf tasks question-answering and fill-mask with appropriate e2e integ tests.

* Format fixes

* Fix pylint

* Format fixes

* Remove speech recognition serializer fixes

* Test fixes

---------

Co-authored-by: Shailav Taneja &lt;none&gt;
Co-authored-by: Shailav Taneja &lt;shailav@amazon.com&gt;
diff --git a/requirements/extras/huggingface_requirements.txt b/requirements/extras/huggingface_requirements.txt
@@ -1 +1,2 @@
 accelerate>=0.24.1,<=0.27.0
+sagemaker_schema_inference_artifacts>=0.0.5
diff --git a/src/sagemaker/serve/builder/model_builder.py b/src/sagemaker/serve/builder/model_builder.py
@@ -637,7 +637,7 @@ def build(  # pylint: disable=R0911
                 if model_task is None:
                     model_task = hf_model_md.get("pipeline_tag")
                 if self.schema_builder is None and model_task is not None:
-                    self._schema_builder_init(model_task)
+                    self._hf_schema_builder_init(model_task)
                 if model_task == "text-generation":  # pylint: disable=R1705
                     return self._build_for_tgi()
                 elif self._can_fit_on_single_gpu():
@@ -704,8 +704,8 @@ def validate(self, model_dir: str) -> Type[bool]:
 
         return get_metadata(model_dir)
 
-    def _schema_builder_init(self, model_task: str):
-        """Initialize the schema builder
+    def _hf_schema_builder_init(self, model_task: str):
+        """Initialize the schema builder for the given HF_TASK
 
         Args:
             model_task (str): Required, the task name
@@ -714,10 +714,29 @@ def _schema_builder_init(self, model_task: str):
             TaskNotFoundException: If the I/O schema for the given task is not found.
         """
         try:
-            sample_inputs, sample_outputs = task.retrieve_local_schemas(model_task)
+            try:
+                sample_inputs, sample_outputs = task.retrieve_local_schemas(model_task)
+            except ValueError:
+                # samples could not be loaded locally, try to fetch remote hf schema
+                from sagemaker_schema_inference_artifacts.huggingface import remote_schema_retriever
+
+                if model_task in ("text-to-image", "automatic-speech-recognition"):
+                    logger.warning(
+                        "HF SchemaBuilder for %s is in beta mode, and is not guaranteed to work "
+                        "with all models at this time.",
+                        model_task,
+                    )
+                remote_hf_schema_helper = remote_schema_retriever.RemoteSchemaRetriever()
+                (
+                    sample_inputs,
+                    sample_outputs,
+                ) = remote_hf_schema_helper.get_resolved_hf_schema_for_task(model_task)
             self.schema_builder = SchemaBuilder(sample_inputs, sample_outputs)
         except ValueError:
-            raise TaskNotFoundException(f"Schema builder for {model_task} could not be found.")
+            raise TaskNotFoundException(
+                f"HuggingFace Schema builder samples for {model_task} could not be found "
+                f"locally or via remote."
+            )
 
     def _can_fit_on_single_gpu(self) -> Type[bool]:
         """Check if model can fit on a single GPU
diff --git a/src/sagemaker/serve/schema/task.json b/src/sagemaker/serve/schema/task.json
@@ -1,38 +1,4 @@
 {
-	"fill-mask": {
-		"sample_inputs": {
-			"properties": {
-				"inputs": "Paris is the [MASK] of France.",
-				"parameters": {}
-			}
-		},
-		"sample_outputs": {
-			"properties": [
-				{
-					"sequence": "Paris is the capital of France.",
-					"score": 0.7
-				}
-			]
-		}
-	},
-	"question-answering": {
-		"sample_inputs": {
-			"properties": {
-				"context": "I have a German Shepherd dog, named Coco.",
-				"question": "What is my dog's breed?"
-			}
-		},
-		"sample_outputs": {
-			"properties": [
-				{
-					"answer": "German Shepherd",
-					"score": 0.972,
-					"start": 9,
-					"end": 24
-				}
-			]
-		}
-	},
 	"text-classification": {
 		"sample_inputs": {
 			"properties": {
diff --git a/tests/integ/sagemaker/serve/test_schema_builder.py b/tests/integ/sagemaker/serve/test_schema_builder.py
@@ -18,6 +18,7 @@
 import pytest
 
 from sagemaker.serve.utils.exceptions import TaskNotFoundException
+from sagemaker_schema_inference_artifacts.huggingface import remote_schema_retriever
 from tests.integ.sagemaker.serve.constants import (
     PYTHON_VERSION_IS_NOT_310,
     SERVE_SAGEMAKER_ENDPOINT_TIMEOUT,
@@ -31,35 +32,73 @@
 logger = logging.getLogger(__name__)
 
 
-def test_model_builder_happy_path_with_only_model_id_fill_mask(sagemaker_session):
-    model_builder = ModelBuilder(model="bert-base-uncased")
+def test_model_builder_happy_path_with_only_model_id_text_generation(sagemaker_session):
+    model_builder = ModelBuilder(model="HuggingFaceH4/zephyr-7b-beta")
 
     model = model_builder.build(sagemaker_session=sagemaker_session)
 
     assert model is not None
     assert model_builder.schema_builder is not None
 
-    inputs, outputs = task.retrieve_local_schemas("fill-mask")
-    assert model_builder.schema_builder.sample_input == inputs
+    inputs, outputs = task.retrieve_local_schemas("text-generation")
+    assert model_builder.schema_builder.sample_input["inputs"] == inputs["inputs"]
     assert model_builder.schema_builder.sample_output == outputs
 
 
+def test_model_builder_negative_path(sagemaker_session):
+    # A model-task combo unsupported by both the local and remote schema fallback options. (eg: text-to-video)
+    model_builder = ModelBuilder(model="ByteDance/AnimateDiff-Lightning")
+    with pytest.raises(
+        TaskNotFoundException,
+        match="Error Message: HuggingFace Schema builder samples for text-to-video could not be found locally or "
+        "via remote.",
+    ):
+        model_builder.build(sagemaker_session=sagemaker_session)
+
+
 @pytest.mark.skipif(
     PYTHON_VERSION_IS_NOT_310,
-    reason="Testing Schema Builder Simplification feature",
+    reason="Testing Schema Builder Simplification feature - Local Schema",
 )
-def test_model_builder_happy_path_with_only_model_id_question_answering(
-    sagemaker_session, gpu_instance_type
+@pytest.mark.parametrize(
+    "model_id, task_provided, instance_type_provided, container_startup_timeout",
+    [
+        (
+            "distilbert/distilbert-base-uncased-finetuned-sst-2-english",
+            "text-classification",
+            "ml.m5.xlarge",
+            None,
+        ),
+        (
+            "cardiffnlp/twitter-roberta-base-sentiment-latest",
+            "text-classification",
+            "ml.m5.xlarge",
+            None,
+        ),
+        ("HuggingFaceH4/zephyr-7b-beta", "text-generation", "ml.g5.2xlarge", 900),
+        ("HuggingFaceH4/zephyr-7b-alpha", "text-generation", "ml.g5.2xlarge", 900),
+    ],
+)
+def test_model_builder_happy_path_with_task_provided_local_schema_mode(
+    model_id, task_provided, sagemaker_session, instance_type_provided, container_startup_timeout
 ):
-    model_builder = ModelBuilder(model="bert-large-uncased-whole-word-masking-finetuned-squad")
+    model_builder = ModelBuilder(
+        model=model_id,
+        model_metadata={"HF_TASK": task_provided},
+        instance_type=instance_type_provided,
+    )
 
     model = model_builder.build(sagemaker_session=sagemaker_session)
 
     assert model is not None
     assert model_builder.schema_builder is not None
 
-    inputs, outputs = task.retrieve_local_schemas("question-answering")
-    assert model_builder.schema_builder.sample_input == inputs
+    inputs, outputs = task.retrieve_local_schemas(task_provided)
+    if task_provided == "text-generation":
+        # ignore 'tokens' and other metadata in this case
+        assert model_builder.schema_builder.sample_input["inputs"] == inputs["inputs"]
+    else:
+        assert model_builder.schema_builder.sample_input == inputs
     assert model_builder.schema_builder.sample_output == outputs
 
     with timeout(minutes=SERVE_SAGEMAKER_ENDPOINT_TIMEOUT):
@@ -69,9 +108,17 @@ def test_model_builder_happy_path_with_only_model_id_question_answering(
             role_arn = iam_client.get_role(RoleName="SageMakerRole")["Role"]["Arn"]
 
             logger.info("Deploying and predicting in SAGEMAKER_ENDPOINT mode...")
-            predictor = model.deploy(
-                role=role_arn, instance_count=1, instance_type=gpu_instance_type
-            )
+            if container_startup_timeout:
+                predictor = model.deploy(
+                    role=role_arn,
+                    instance_count=1,
+                    instance_type=instance_type_provided,
+                    container_startup_health_check_timeout=container_startup_timeout,
+                )
+            else:
+                predictor = model.deploy(
+                    role=role_arn, instance_count=1, instance_type=instance_type_provided
+                )
 
             predicted_outputs = predictor.predict(inputs)
             assert predicted_outputs is not None
@@ -91,38 +138,38 @@ def test_model_builder_happy_path_with_only_model_id_question_answering(
                 ), f"{caught_ex} was thrown when running transformers sagemaker endpoint test"
 
 
-def test_model_builder_negative_path(sagemaker_session):
-    model_builder = ModelBuilder(model="CompVis/stable-diffusion-v1-4")
-
-    with pytest.raises(
-        TaskNotFoundException,
-        match="Error Message: Schema builder for text-to-image could not be found.",
-    ):
-        model_builder.build(sagemaker_session=sagemaker_session)
-
-
 @pytest.mark.skipif(
     PYTHON_VERSION_IS_NOT_310,
-    reason="Testing Schema Builder Simplification feature",
+    reason="Testing Schema Builder Simplification feature - Remote Schema",
 )
 @pytest.mark.parametrize(
-    "model_id, task_provided",
+    "model_id, task_provided, instance_type_provided",
     [
-        ("bert-base-uncased", "fill-mask"),
-        ("bert-large-uncased-whole-word-masking-finetuned-squad", "question-answering"),
+        ("google-bert/bert-base-uncased", "fill-mask", "ml.m5.xlarge"),
+        ("google-bert/bert-base-cased", "fill-mask", "ml.m5.xlarge"),
+        (
+            "google-bert/bert-large-uncased-whole-word-masking-finetuned-squad",
+            "question-answering",
+            "ml.m5.xlarge",
+        ),
+        ("deepset/roberta-base-squad2", "question-answering", "ml.m5.xlarge"),
     ],
 )
-def test_model_builder_happy_path_with_task_provided(
-    model_id, task_provided, sagemaker_session, gpu_instance_type
+def test_model_builder_happy_path_with_task_provided_remote_schema_mode(
+    model_id, task_provided, sagemaker_session, instance_type_provided
 ):
-    model_builder = ModelBuilder(model=model_id, model_metadata={"HF_TASK": task_provided})
-
+    model_builder = ModelBuilder(
+        model=model_id,
+        model_metadata={"HF_TASK": task_provided},
+        instance_type=instance_type_provided,
+    )
     model = model_builder.build(sagemaker_session=sagemaker_session)
 
     assert model is not None
     assert model_builder.schema_builder is not None
 
-    inputs, outputs = task.retrieve_local_schemas(task_provided)
+    remote_hf_schema_helper = remote_schema_retriever.RemoteSchemaRetriever()
+    inputs, outputs = remote_hf_schema_helper.get_resolved_hf_schema_for_task(task_provided)
     assert model_builder.schema_builder.sample_input == inputs
     assert model_builder.schema_builder.sample_output == outputs
 
@@ -134,7 +181,7 @@ def test_model_builder_happy_path_with_task_provided(
 
             logger.info("Deploying and predicting in SAGEMAKER_ENDPOINT mode...")
             predictor = model.deploy(
-                role=role_arn, instance_count=1, instance_type=gpu_instance_type
+                role=role_arn, instance_count=1, instance_type=instance_type_provided
             )
 
             predicted_outputs = predictor.predict(inputs)
@@ -162,6 +209,7 @@ def test_model_builder_negative_path_with_invalid_task(sagemaker_session):
 
     with pytest.raises(
         TaskNotFoundException,
-        match="Error Message: Schema builder for invalid-task could not be found.",
+        match="Error Message: HuggingFace Schema builder samples for invalid-task could not be found locally or "
+        "via remote.",
     ):
         model_builder.build(sagemaker_session=sagemaker_session)
diff --git a/tests/unit/sagemaker/serve/builder/test_model_builder.py b/tests/unit/sagemaker/serve/builder/test_model_builder.py
@@ -1062,7 +1062,7 @@ def test_build_negative_path_when_schema_builder_not_present(
 
         # HF Pipeline Tag
         mock_model_uris_retrieve.side_effect = KeyError
-        mock_llm_utils_json.load.return_value = {"pipeline_tag": "text-to-image"}
+        mock_llm_utils_json.load.return_value = {"pipeline_tag": "unsupported-task"}
         mock_llm_utils_urllib.request.Request.side_effect = Mock()
 
         # HF Model config
@@ -1075,7 +1075,8 @@ def test_build_negative_path_when_schema_builder_not_present(
 
         self.assertRaisesRegex(
             TaskNotFoundException,
-            "Error Message: Schema builder for text-to-image could not be found.",
+            "Error Message: HuggingFace Schema builder samples for unsupported-task could not be found locally or via "
+            "remote.",
             lambda: model_builder.build(sagemaker_session=mock_session),
         )
 
@@ -1627,7 +1628,8 @@ def test_build_task_override_with_invalid_task_provided(
 
             self.assertRaisesRegex(
                 TaskNotFoundException,
-                f"Error Message: Schema builder for {provided_task} could not be found.",
+                f"Error Message: HuggingFace Schema builder samples for {provided_task} could not be found locally or "
+                f"via remote.",
                 lambda: model_builder.build(sagemaker_session=mock_session),
             )
 
diff --git a/tests/unit/sagemaker/serve/utils/test_task.py b/tests/unit/sagemaker/serve/utils/test_task.py
@@ -18,23 +18,29 @@
 
 from sagemaker.serve.utils import task
 
-EXPECTED_INPUTS = {"inputs": "Paris is the [MASK] of France.", "parameters": {}}
-EXPECTED_OUTPUTS = [{"sequence": "Paris is the capital of France.", "score": 0.7}]
 HF_INVALID_TASK = "not-present-task"
 
 
-def test_retrieve_local_schemas_success():
-    inputs, outputs = task.retrieve_local_schemas("fill-mask")
+def test_retrieve_local_schemas_text_generation_success():
+    inputs, outputs = task.retrieve_local_schemas("text-generation")
 
-    assert inputs == EXPECTED_INPUTS
-    assert outputs == EXPECTED_OUTPUTS
+    assert inputs == {"inputs": "Hello, I'm a language model", "parameters": {}}
+    assert outputs == [
+        {
+            "generated_text": "Hello, I'm a language modeler. So while writing this, when I went out to "
+            "meet my wife or come home she told me that my"
+        }
+    ]
 
 
-def test_retrieve_local_schemas_text_generation_success():
-    inputs, outputs = task.retrieve_local_schemas("text-generation")
+def test_retrieve_local_schemas_text_classification_success():
+    inputs, outputs = task.retrieve_local_schemas("text-classification")
 
-    assert inputs is not None
-    assert outputs is not None
+    assert inputs == {
+        "inputs": "Where is the capital of France?, Paris is the capital of France.",
+        "parameters": {},
+    }
+    assert outputs == [{"label": "entailment", "score": 0.997}]
 
 
 def test_retrieve_local_schemas_throws():

Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`	`1`	`accelerate>=0.24.1,<=0.27.0`
	`2`	`+sagemaker_schema_inference_artifacts>=0.0.5`