Skip to content

Feat: Pull latest tei container for sentence similiarity models on HuggingFace hub #4686

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 31 commits into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
2e00238
Update: Pull latest tei container for sentence similiarity models
samruds May 15, 2024
43ce1ba
Fix formatting
samruds May 15, 2024
6211227
Address PR comments
samruds May 15, 2024
0441436
Fix formatting
samruds May 15, 2024
4973f8f
Fix check
samruds May 16, 2024
f8cd864
Switch sentence similarity to be deployed on tgi
samruds May 16, 2024
a5fa0e9
Fix formatting
samruds May 16, 2024
e524134
Fix formatting
samruds May 16, 2024
4263a44
Fix formatting
samruds May 16, 2024
eb3b6d3
Fix formatting
samruds May 16, 2024
2b9ba2a
Introduce TEI builder with TGI server
samruds May 16, 2024
33d5b04
Fix formmatting
samruds May 16, 2024
20687f0
Add integ test
samruds May 16, 2024
d85425f
Fix formatting
samruds May 16, 2024
bbdff4c
Add integ test
samruds May 16, 2024
a526416
Add integ test
samruds May 16, 2024
1e49f88
Add integ test
samruds May 16, 2024
af78426
Add integ test
samruds May 16, 2024
a5e665a
Add integ test
samruds May 16, 2024
e58f622
Fix formatting
samruds May 16, 2024
4c336dd
Merge branch 'master' into master
samruds May 16, 2024
ea900bf
Move to G5 for integ test
samruds May 16, 2024
cffe46a
Fix formatting
samruds May 16, 2024
48205ad
Integ test updates
samruds May 17, 2024
312d837
Integ test updates
samruds May 17, 2024
29ea1c5
Integ test updates
samruds May 17, 2024
f6f8116
Fix formatting
samruds May 17, 2024
166e570
Integ test updates
samruds May 17, 2024
4bb5522
Move back to generate for ping
samruds May 17, 2024
17645f7
Integ test updates
samruds May 17, 2024
e8341c2
Integ test updates
samruds May 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 23 additions & 2 deletions src/sagemaker/serve/builder/transformers_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
_get_nb_instance,
)
from sagemaker.djl_inference.model import _get_model_config_properties_from_hf
from sagemaker.huggingface import HuggingFaceModel
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri
from sagemaker.serve.model_server.multi_model_server.prepare import (
_create_dir_structure,
)
Expand All @@ -47,6 +47,7 @@ class Transformers(ABC):
"""Transformers build logic with ModelBuilder()"""

def __init__(self):
self.model_metadata = None
self.model = None
self.serve_settings = None
self.sagemaker_session = None
Expand Down Expand Up @@ -99,7 +100,27 @@ def _create_transformers_model(self) -> Type[Model]:
if hf_model_md is None:
raise ValueError("Could not fetch HF metadata")

if "pytorch" in hf_model_md.get("tags"):
model_task = None
if self.model_metadata:
model_task = self.model_metadata.get("HF_TASK")
else:
model_task = hf_model_md.get("pipeline_tag")

if model_task == "sentence-similarity" and not self.image_uri:
self.image_uri = get_huggingface_llm_image_uri(
"huggingface-tei", session=self.sagemaker_session
)

logger.info("Auto detected %s. Proceeding with the the deployment.", self.image_uri)

pysdk_model = HuggingFaceModel(
env=self.env_vars,
role=self.role_arn,
sagemaker_session=self.sagemaker_session,
image_uri=self.image_uri,
vpc_config=self.vpc_config,
)
elif "pytorch" in hf_model_md.get("tags"):
self.pytorch_version = self._get_supported_version(
hf_config, base_hf_version, "pytorch"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def test_build_deploy_for_transformers_local_container_and_remote_container(
return_value="ml.g5.24xlarge",
)
@patch("sagemaker.serve.builder.transformers_builder._capture_telemetry", side_effect=None)
def test_image_uri(
def test_image_uri_override(
self,
mock_get_nb_instance,
mock_telemetry,
Expand Down Expand Up @@ -144,3 +144,51 @@ def test_image_uri(

with self.assertRaises(ValueError) as _:
model.deploy(mode=Mode.IN_PROCESS)

@patch(
"sagemaker.serve.builder.transformers_builder._get_nb_instance",
return_value="ml.g5.24xlarge",
)
@patch(
"sagemaker.huggingface.llm_utils.get_huggingface_model_metadata",
return_value="sentence-similarity",
)
@patch(
"sagemaker.huggingface.get_huggingface_llm_image_uri", return_value=MOCK_IMAGE_CONFIG
)
@patch("sagemaker.serve.builder.transformers_builder._capture_telemetry", side_effect=None)
def test_sentence_similarity_support(
self,
mock_get_nb_instance,
mock_task,
mock_image,
mock_telemetry,
):
builder = ModelBuilder(
model=mock_model_id,
schema_builder=mock_schema_builder,
mode=Mode.LOCAL_CONTAINER,
)

builder._prepare_for_mode = MagicMock()
builder._prepare_for_mode.side_effect = None

model = builder.build()
builder.serve_settings.telemetry_opt_out = True

builder.modes[str(Mode.LOCAL_CONTAINER)] = MagicMock()
predictor = model.deploy(model_data_download_timeout=1800)

assert builder.image_uri == MOCK_IMAGE_CONFIG
assert builder.env_vars["MODEL_LOADING_TIMEOUT"] == "1800"
assert isinstance(predictor, TransformersLocalModePredictor)

assert builder.nb_instance_type == "ml.g5.24xlarge"

builder._original_deploy = MagicMock()
builder._prepare_for_mode.return_value = (None, {})
predictor = model.deploy(mode=Mode.SAGEMAKER_ENDPOINT, role="mock_role_arn")
assert "HF_MODEL_ID" in model.env

with self.assertRaises(ValueError) as _:
model.deploy(mode=Mode.IN_PROCESS)
Loading