diff --git a/src/sagemaker/djl_inference/model.py b/src/sagemaker/djl_inference/model.py index 0fbc28de13..95254d63b7 100644 --- a/src/sagemaker/djl_inference/model.py +++ b/src/sagemaker/djl_inference/model.py @@ -781,7 +781,7 @@ def serving_image_uri(self, region_name): str: The appropriate image URI based on the given parameters. """ if not self.djl_version: - self.djl_version = "0.23.0" + self.djl_version = "0.24.0" return image_uris.retrieve( self._framework(), diff --git a/src/sagemaker/huggingface/llm_utils.py b/src/sagemaker/huggingface/llm_utils.py index d7b63e19da..aef5e5e585 100644 --- a/src/sagemaker/huggingface/llm_utils.py +++ b/src/sagemaker/huggingface/llm_utils.py @@ -51,6 +51,6 @@ def get_huggingface_llm_image_uri( image_scope="inference", ) if backend == "lmi": - version = version or "0.23.0" + version = version or "0.24.0" return image_uris.retrieve(framework="djl-deepspeed", region=region, version=version) raise ValueError("Unsupported backend: %s" % backend) diff --git a/src/sagemaker/image_uri_config/djl-deepspeed.json b/src/sagemaker/image_uri_config/djl-deepspeed.json index e6b79b4847..b78ffaa3eb 100644 --- a/src/sagemaker/image_uri_config/djl-deepspeed.json +++ b/src/sagemaker/image_uri_config/djl-deepspeed.json @@ -1,6 +1,37 @@ { "scope": ["inference"], "versions": { + "0.24.0": { + "registries": { + "af-south-1": "626614931356", + "il-central-1": "780543022126", + "ap-east-1": "871362719292", + "ap-northeast-1": "763104351884", + "ap-northeast-2": "763104351884", + "ap-northeast-3": "364406365360", + "ap-south-1": "763104351884", + "ap-southeast-1": "763104351884", + "ap-southeast-2": "763104351884", + "ap-southeast-3": "907027046896", + "ca-central-1": "763104351884", + "cn-north-1": "727897471807", + "cn-northwest-1": "727897471807", + "eu-central-1": "763104351884", + "eu-north-1": "763104351884", + "eu-west-1": "763104351884", + "eu-west-2": "763104351884", + "eu-west-3": "763104351884", + "eu-south-1": "692866216735", + "me-south-1": "217643126080", + "sa-east-1": "763104351884", + "us-east-1": "763104351884", + "us-east-2": "763104351884", + "us-west-1": "763104351884", + "us-west-2": "763104351884" + }, + "repository": "djl-inference", + "tag_prefix": "0.24.0-deepspeed0.10.0-cu118" + }, "0.23.0": { "registries": { "af-south-1": "626614931356", diff --git a/src/sagemaker/image_uri_config/djl-fastertransformer.json b/src/sagemaker/image_uri_config/djl-fastertransformer.json index 8a6047a800..01c7335048 100644 --- a/src/sagemaker/image_uri_config/djl-fastertransformer.json +++ b/src/sagemaker/image_uri_config/djl-fastertransformer.json @@ -1,6 +1,37 @@ { "scope": ["inference"], "versions": { + "0.24.0": { + "registries": { + "af-south-1": "626614931356", + "il-central-1": "780543022126", + "ap-east-1": "871362719292", + "ap-northeast-1": "763104351884", + "ap-northeast-2": "763104351884", + "ap-northeast-3": "364406365360", + "ap-south-1": "763104351884", + "ap-southeast-1": "763104351884", + "ap-southeast-2": "763104351884", + "ap-southeast-3": "907027046896", + "ca-central-1": "763104351884", + "cn-north-1": "727897471807", + "cn-northwest-1": "727897471807", + "eu-central-1": "763104351884", + "eu-north-1": "763104351884", + "eu-west-1": "763104351884", + "eu-west-2": "763104351884", + "eu-west-3": "763104351884", + "eu-south-1": "692866216735", + "me-south-1": "217643126080", + "sa-east-1": "763104351884", + "us-east-1": "763104351884", + "us-east-2": "763104351884", + "us-west-1": "763104351884", + "us-west-2": "763104351884" + }, + "repository": "djl-inference", + "tag_prefix": "0.24.0-fastertransformer5.3.0-cu118" + }, "0.23.0": { "registries": { "af-south-1": "626614931356", diff --git a/tests/unit/sagemaker/image_uris/test_djl.py b/tests/unit/sagemaker/image_uris/test_djl.py index a0018adf3a..0153310383 100644 --- a/tests/unit/sagemaker/image_uris/test_djl.py +++ b/tests/unit/sagemaker/image_uris/test_djl.py @@ -42,9 +42,8 @@ "us-west-1": "763104351884", "us-west-2": "763104351884", } -DJL_DEEPSPEED_VERSIONS = ["0.23.0", "0.22.1", "0.21.0", "0.20.0", "0.19.0"] -DJL_FASTERTRANSFORMER_VERSIONS = ["0.23.0", "0.22.1", "0.21.0"] -DJL_NEURONX_VERSIONS = ["0.23.0", "0.22.1"] +DJL_DEEPSPEED_VERSIONS = ["0.24.0", "0.23.0", "0.22.1", "0.21.0", "0.20.0", "0.19.0"] +DJL_FASTERTRANSFORMER_VERSIONS = ["0.24.0", "0.23.0", "0.22.1", "0.21.0"] DJL_NEURONX_VERSIONS = ["0.24.0", "0.23.0", "0.22.1"] DJL_VERSIONS_TO_FRAMEWORK = { "0.19.0": {"djl-deepspeed": "deepspeed0.7.3-cu113"}, @@ -64,6 +63,8 @@ "djl-neuronx": "neuronx-sdk2.12.0", }, "0.24.0": { + "djl-deepspeed": "deepspeed0.10.0-cu118", + "djl-fastertransformer": "fastertransformer5.3.0-cu118", "djl-neuronx": "neuronx-sdk2.14.1", }, } diff --git a/tests/unit/sagemaker/image_uris/test_huggingface_llm.py b/tests/unit/sagemaker/image_uris/test_huggingface_llm.py index a358bbb4fb..e32d305030 100644 --- a/tests/unit/sagemaker/image_uris/test_huggingface_llm.py +++ b/tests/unit/sagemaker/image_uris/test_huggingface_llm.py @@ -45,7 +45,7 @@ "us-west-2": "763104351884", } HF_VERSIONS = ["0.6.0", "0.8.2", "0.9.3", "1.0.3", "1.1.0"] -LMI_VERSIONS = ["0.23.0"] +LMI_VERSIONS = ["0.24.0"] HF_VERSIONS_MAPPING = { "0.6.0": "2.0.0-tgi0.6.0-gpu-py39-cu118-ubuntu20.04", "0.8.2": "2.0.0-tgi0.8.2-gpu-py39-cu118-ubuntu20.04", @@ -53,7 +53,7 @@ "1.0.3": "2.0.1-tgi1.0.3-gpu-py39-cu118-ubuntu20.04", "1.1.0": "2.0.1-tgi1.1.0-gpu-py39-cu118-ubuntu20.04", } -LMI_VERSIONS_MAPPING = {"0.23.0": "deepspeed0.9.5-cu118"} +LMI_VERSIONS_MAPPING = {"0.24.0": "deepspeed0.10.0-cu118"} @pytest.mark.parametrize("version", HF_VERSIONS) diff --git a/tests/unit/test_djl_inference.py b/tests/unit/test_djl_inference.py index e1c56c6c27..863b5ba2b8 100644 --- a/tests/unit/test_djl_inference.py +++ b/tests/unit/test_djl_inference.py @@ -46,7 +46,7 @@ ROLE = "dummy_role" REGION = "us-west-2" BUCKET = "mybucket" -IMAGE_URI = "763104351884.dkr.ecr.us-west-2.amazon.com/djl-inference:0.23.0-deepspeed0.9.5-cu118" +IMAGE_URI = "763104351884.dkr.ecr.us-west-2.amazon.com/djl-inference:0.24.0-deepspeed0.10.0-cu118" GPU_INSTANCE = "ml.g5.12xlarge"