Skip to content

Commit e14cdad

Browse files
committed
[fix] Update dtype logic for huggingface backend for new containers
1 parent 1ae7ce9 commit e14cdad

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed

src/sagemaker/djl_inference/model.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -854,11 +854,13 @@ def generate_serving_properties(self, serving_properties=None) -> Dict[str, str]
854854
if self.low_cpu_mem_usage:
855855
serving_properties["option.low_cpu_mem_usage"] = self.low_cpu_mem_usage
856856
# This is a workaround due to a bug in our built in handler for huggingface
857-
# TODO: This needs to be fixed when new dlc is published
857+
# TODO: Remove this logic whenever 0.20.0 image is out of service
858858
if (
859859
serving_properties["option.entryPoint"] == "djl_python.huggingface"
860860
and self.dtype
861861
and self.dtype != "auto"
862+
and self.djl_version
863+
and int(self.djl_version.split(".")[1]) < 21
862864
):
863865
serving_properties["option.dtype"] = "auto"
864866
serving_properties.pop("option.load_in_8bit", None)

tests/unit/test_djl_inference.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,7 @@ def test_generate_serving_properties_with_valid_configurations(
454454
"option.entryPoint": "djl_python.huggingface",
455455
"option.s3url": VALID_UNCOMPRESSED_MODEL_DATA,
456456
"option.tensor_parallel_degree": 1,
457-
"option.dtype": "auto",
457+
"option.dtype": "fp32",
458458
"option.device_id": 4,
459459
"option.device_map": "balanced",
460460
}

0 commit comments

Comments
 (0)