diff --git a/src/sagemaker/djl_inference/model.py b/src/sagemaker/djl_inference/model.py index b9828e7037..b91851576e 100644 --- a/src/sagemaker/djl_inference/model.py +++ b/src/sagemaker/djl_inference/model.py @@ -854,11 +854,13 @@ def generate_serving_properties(self, serving_properties=None) -> Dict[str, str] if self.low_cpu_mem_usage: serving_properties["option.low_cpu_mem_usage"] = self.low_cpu_mem_usage # This is a workaround due to a bug in our built in handler for huggingface - # TODO: This needs to be fixed when new dlc is published + # TODO: Remove this logic whenever 0.20.0 image is out of service if ( serving_properties["option.entryPoint"] == "djl_python.huggingface" and self.dtype and self.dtype != "auto" + and self.djl_version + and int(self.djl_version.split(".")[1]) < 21 ): serving_properties["option.dtype"] = "auto" serving_properties.pop("option.load_in_8bit", None) diff --git a/tests/unit/test_djl_inference.py b/tests/unit/test_djl_inference.py index 93a1fba336..06adea8e76 100644 --- a/tests/unit/test_djl_inference.py +++ b/tests/unit/test_djl_inference.py @@ -454,7 +454,7 @@ def test_generate_serving_properties_with_valid_configurations( "option.entryPoint": "djl_python.huggingface", "option.s3url": VALID_UNCOMPRESSED_MODEL_DATA, "option.tensor_parallel_degree": 1, - "option.dtype": "auto", + "option.dtype": "fp32", "option.device_id": 4, "option.device_map": "balanced", }