Skip to content

Fix: Updated js mb compression logic - ModelBuilder #4294

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Dec 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/sagemaker/serve/builder/jumpstart_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def _is_jumpstart_model_id(self) -> bool:
def _create_pre_trained_js_model(self) -> Type[Model]:
"""Placeholder docstring"""
pysdk_model = JumpStartModel(self.model)
pysdk_model.sagemaker_session = self.sagemaker_session

self._original_deploy = pysdk_model.deploy
pysdk_model.deploy = self._js_builder_deploy_wrapper
Expand Down
5 changes: 4 additions & 1 deletion src/sagemaker/serve/builder/tgi_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,10 @@ def _create_tgi_model(self) -> Type[Model]:
logger.info("Auto detected %s. Proceeding with the the deployment.", self.image_uri)

pysdk_model = HuggingFaceModel(
image_uri=self.image_uri, env=self.env_vars, role=self.role_arn
image_uri=self.image_uri,
env=self.env_vars,
role=self.role_arn,
sagemaker_session=self.sagemaker_session,
)

self._original_deploy = pysdk_model.deploy
Expand Down
62 changes: 46 additions & 16 deletions src/sagemaker/serve/model_server/djl_serving/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@

from __future__ import absolute_import
import shutil
import tarfile
import subprocess
import json
import tarfile
import logging
from typing import List
from pathlib import Path

from sagemaker.utils import _tmpdir
from sagemaker.s3 import S3Downloader
from sagemaker.djl_inference import DJLModel
from sagemaker.djl_inference.model import _read_existing_serving_properties
from sagemaker.serve.utils.local_hardware import _check_disk_space, _check_docker_disk_usage
Expand All @@ -34,27 +34,57 @@


def _has_serving_properties_file(code_dir: Path) -> bool:
"""Placeholder Docstring"""
"""Check for existing serving properties in the directory"""
return code_dir.joinpath(_SERVING_PROPERTIES_FILE).is_file()


def _members(resources: object, depth: int):
"""Placeholder Docstring"""
for member in resources.getmembers():
member.path = member.path.split("/", depth)[-1]
yield member
def _move_to_code_dir(js_model_dir: str, code_dir: Path):
"""Move DJL Jumpstart resources from model to code_dir"""
js_model_resources = Path(js_model_dir).joinpath("model")
for resource in js_model_resources.glob("*"):
try:
shutil.move(resource, code_dir)
except shutil.Error as e:
if "already exists" in str(e):
continue


def _extract_js_resource(js_model_dir: str, js_id: str):
"""Uncompress the jumpstart resource"""
tmp_sourcedir = Path(js_model_dir).joinpath(f"infer-prepack-{js_id}.tar.gz")
with tarfile.open(str(tmp_sourcedir)) as resources:
resources.extractall(path=js_model_dir)


def _copy_jumpstart_artifacts(model_data: str, js_id: str, code_dir: Path):
"""Placeholder Docstring"""
"""Copy the associated JumpStart Resource into the code directory"""
logger.info("Downloading JumpStart artifacts from S3...")
with _tmpdir(directory=str(code_dir)) as js_model_dir:
subprocess.run(["aws", "s3", "cp", model_data, js_model_dir])

logger.info("Uncompressing JumpStart artifacts for faster loading...")
tmp_sourcedir = Path(js_model_dir).joinpath(f"infer-prepack-{js_id}.tar.gz")
with tarfile.open(str(tmp_sourcedir)) as resources:
resources.extractall(path=code_dir, members=_members(resources, 1))
s3_downloader = S3Downloader()
invalid_model_data_format = False
with _tmpdir(directory=str(code_dir)) as js_model_dir:
if isinstance(model_data, str):
if model_data.endswith(".tar.gz"):
logger.info("Uncompressing JumpStart artifacts for faster loading...")
s3_downloader.download(model_data, js_model_dir)
_extract_js_resource(js_model_dir, js_id)
else:
logger.info("Copying uncompressed JumpStart artifacts...")
s3_downloader.download(model_data, js_model_dir)
elif (
isinstance(model_data, dict)
and model_data.get("S3DataSource")
and model_data.get("S3DataSource").get("S3Uri")
):
logger.info("Copying uncompressed JumpStart artifacts...")
s3_downloader.download(model_data.get("S3DataSource").get("S3Uri"), js_model_dir)
else:
invalid_model_data_format = True
if not invalid_model_data_format:
_move_to_code_dir(js_model_dir, code_dir)

if invalid_model_data_format:
raise ValueError("JumpStart model data compression format is unsupported: %s", model_data)

existing_properties = _read_existing_serving_properties(code_dir)
config_json_file = code_dir.joinpath("config.json")
Expand All @@ -70,7 +100,7 @@ def _copy_jumpstart_artifacts(model_data: str, js_id: str, code_dir: Path):
def _generate_properties_file(
model: DJLModel, code_dir: Path, overwrite_props_from_file: bool, manual_set_props: dict
):
"""Placeholder Docstring"""
"""Construct serving properties file taking into account of overrides or manual specs"""
if _has_serving_properties_file(code_dir):
existing_properties = _read_existing_serving_properties(code_dir)
else:
Expand Down
53 changes: 41 additions & 12 deletions src/sagemaker/serve/model_server/tgi/prepare.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,66 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
"""Prepare TgiModel for Deployment"""

from __future__ import absolute_import
import tarfile
import subprocess
import logging
from typing import List
from pathlib import Path

from sagemaker.serve.utils.local_hardware import _check_disk_space, _check_docker_disk_usage
from sagemaker.utils import _tmpdir
from sagemaker.s3 import S3Downloader

logger = logging.getLogger(__name__)


def _extract_js_resource(js_model_dir: str, code_dir: Path, js_id: str):
"""Uncompress the jumpstart resource"""
tmp_sourcedir = Path(js_model_dir).joinpath(f"infer-prepack-{js_id}.tar.gz")
with tarfile.open(str(tmp_sourcedir)) as resources:
resources.extractall(path=code_dir)


def _copy_jumpstart_artifacts(model_data: str, js_id: str, code_dir: Path) -> bool:
"""Placeholder Docstring"""
"""Copy the associated JumpStart Resource into the code directory"""
logger.info("Downloading JumpStart artifacts from S3...")
with _tmpdir(directory=str(code_dir)) as js_model_dir:
js_model_data_loc = model_data.get("S3DataSource").get("S3Uri")
# TODO: leave this check here until we are sure every js model has moved to uncompressed
if js_model_data_loc.endswith("tar.gz"):
subprocess.run(["aws", "s3", "cp", js_model_data_loc, js_model_dir])

s3_downloader = S3Downloader()
if isinstance(model_data, str):
if model_data.endswith(".tar.gz"):
logger.info("Uncompressing JumpStart artifacts for faster loading...")
tmp_sourcedir = Path(js_model_dir).joinpath(f"infer-prepack-{js_id}.tar.gz")
with tarfile.open(str(tmp_sourcedir)) as resources:
resources.extractall(path=code_dir)
with _tmpdir(directory=str(code_dir)) as js_model_dir:
s3_downloader.download(model_data, js_model_dir)
_extract_js_resource(js_model_dir, code_dir, js_id)
else:
subprocess.run(["aws", "s3", "cp", js_model_data_loc, js_model_dir, "--recursive"])
logger.info("Copying uncompressed JumpStart artifacts...")
s3_downloader.download(model_data, code_dir)
elif (
isinstance(model_data, dict)
and model_data.get("S3DataSource")
and model_data.get("S3DataSource").get("S3Uri")
):
logger.info("Copying uncompressed JumpStart artifacts...")
s3_downloader.download(model_data.get("S3DataSource").get("S3Uri"), code_dir)
else:
raise ValueError("JumpStart model data compression format is unsupported: %s", model_data)

return True


def _create_dir_structure(model_path: str) -> tuple:
"""Placeholder Docstring"""
"""Create the expected model directory structure for the TGI server"""
model_path = Path(model_path)
if not model_path.exists():
model_path.mkdir(parents=True)
Expand Down
9 changes: 9 additions & 0 deletions tests/unit/sagemaker/serve/builder/test_djl_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,12 @@ def test_build_deploy_for_djl_local_container(
mode=Mode.LOCAL_CONTAINER,
model_server=ModelServer.DJL_SERVING,
)

builder._prepare_for_mode = MagicMock()
builder._prepare_for_mode.side_effect = None

model = builder.build()
builder.serve_settings.telemetry_opt_out = True

assert isinstance(model, HuggingFaceAccelerateModel)
assert (
Expand Down Expand Up @@ -176,6 +178,7 @@ def test_build_for_djl_local_container_faster_transformer(
model_server=ModelServer.DJL_SERVING,
)
model = builder.build()
builder.serve_settings.telemetry_opt_out = True

assert isinstance(model, FasterTransformerModel)
assert (
Expand Down Expand Up @@ -211,6 +214,7 @@ def test_build_for_djl_local_container_deepspeed(
model_server=ModelServer.DJL_SERVING,
)
model = builder.build()
builder.serve_settings.telemetry_opt_out = True

assert isinstance(model, DeepSpeedModel)
assert model.generate_serving_properties() == mock_expected_deepspeed_serving_properties
Expand Down Expand Up @@ -268,6 +272,7 @@ def test_tune_for_djl_local_container(
builder._djl_model_builder_deploy_wrapper = MagicMock()

model = builder.build()
builder.serve_settings.telemetry_opt_out = True
tuned_model = model.tune()
assert tuned_model.generate_serving_properties() == mock_most_performant_serving_properties

Expand Down Expand Up @@ -317,6 +322,7 @@ def test_tune_for_djl_local_container_deep_ping_ex(
builder._prepare_for_mode.side_effect = None

model = builder.build()
builder.serve_settings.telemetry_opt_out = True
tuned_model = model.tune()
assert (
tuned_model.generate_serving_properties()
Expand Down Expand Up @@ -369,6 +375,7 @@ def test_tune_for_djl_local_container_load_ex(
builder._prepare_for_mode.side_effect = None

model = builder.build()
builder.serve_settings.telemetry_opt_out = True
tuned_model = model.tune()
assert (
tuned_model.generate_serving_properties()
Expand Down Expand Up @@ -421,6 +428,7 @@ def test_tune_for_djl_local_container_oom_ex(
builder._prepare_for_mode.side_effect = None

model = builder.build()
builder.serve_settings.telemetry_opt_out = True
tuned_model = model.tune()
assert (
tuned_model.generate_serving_properties()
Expand Down Expand Up @@ -473,6 +481,7 @@ def test_tune_for_djl_local_container_invoke_ex(
builder._prepare_for_mode.side_effect = None

model = builder.build()
builder.serve_settings.telemetry_opt_out = True
tuned_model = model.tune()
assert (
tuned_model.generate_serving_properties()
Expand Down
33 changes: 33 additions & 0 deletions tests/unit/sagemaker/serve/model_server/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from __future__ import absolute_import

MOCK_MODEL_PATH = "/path/to/mock/model/dir"
MOCK_CODE_DIR = "/path/to/mock/model/dir/code"
MOCK_JUMPSTART_ID = "mock_llm_js_id"
MOCK_TMP_DIR = "tmp123456"
MOCK_COMPRESSED_MODEL_DATA_STR = (
"s3://jumpstart-cache/to/infer-prepack-huggingface-llm-falcon-7b-bf16.tar.gz"
)
MOCK_UNCOMPRESSED_MODEL_DATA_STR = "s3://jumpstart-cache/to/artifacts/inference-prepack/v1.0.1/"
MOCK_UNCOMPRESSED_MODEL_DATA_STR_FOR_DICT = (
"s3://jumpstart-cache/to/artifacts/inference-prepack/v1.0.1/dict/"
)
MOCK_UNCOMPRESSED_MODEL_DATA_DICT = {
"S3DataSource": {
"S3Uri": MOCK_UNCOMPRESSED_MODEL_DATA_STR_FOR_DICT,
"S3DataType": "S3Prefix",
"CompressionType": "None",
}
}
MOCK_INVALID_MODEL_DATA_DICT = {}
Loading