Skip to content

Commit 38a3a2d

Browse files
authored
change: Change data_type argument to dtype to keep consistent with D… (#3832)
1 parent 1a29c6c commit 38a3a2d

File tree

3 files changed

+30
-23
lines changed

3 files changed

+30
-23
lines changed

doc/frameworks/djl/using_djl.rst

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ You can either deploy your model using DeepSpeed or HuggingFace Accelerate, or l
3131
djl_model = DJLModel(
3232
"s3://my_bucket/my_saved_model_artifacts/", # This can also be a HuggingFace Hub model id
3333
"my_sagemaker_role",
34-
data_type="fp16",
34+
dtype="fp16",
3535
task="text-generation",
3636
number_of_partitions=2 # number of gpus to partition the model across
3737
)
@@ -48,7 +48,7 @@ If you want to use a specific backend, then you can create an instance of the co
4848
deepspeed_model = DeepSpeedModel(
4949
"s3://my_bucket/my_saved_model_artifacts/", # This can also be a HuggingFace Hub model id
5050
"my_sagemaker_role",
51-
data_type="bf16",
51+
dtype="bf16",
5252
task="text-generation",
5353
tensor_parallel_degree=2, # number of gpus to partition the model across using tensor parallelism
5454
)
@@ -58,7 +58,7 @@ If you want to use a specific backend, then you can create an instance of the co
5858
hf_accelerate_model = HuggingFaceAccelerateModel(
5959
"s3://my_bucket/my_saved_model_artifacts/", # This can also be a HuggingFace Hub model id
6060
"my_sagemaker_role",
61-
data_type="fp16",
61+
dtype="fp16",
6262
task="text-generation",
6363
number_of_partitions=2, # number of gpus to partition the model across
6464
)
@@ -109,7 +109,7 @@ For example, you can deploy the EleutherAI gpt-j-6B model like this:
109109
model = DJLModel(
110110
"EleutherAI/gpt-j-6B",
111111
"my_sagemaker_role",
112-
data_type="fp16",
112+
dtype="fp16",
113113
number_of_partitions=2
114114
)
115115
@@ -142,7 +142,7 @@ You would then pass "s3://my_bucket/gpt-j-6B" as ``model_id`` to the ``DJLModel`
142142
model = DJLModel(
143143
"s3://my_bucket/gpt-j-6B",
144144
"my_sagemaker_role",
145-
data_type="fp16",
145+
dtype="fp16",
146146
number_of_partitions=2
147147
)
148148

src/sagemaker/djl_inference/model.py

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ def __init__(
233233
role: str,
234234
djl_version: Optional[str] = None,
235235
task: Optional[str] = None,
236-
data_type: str = "fp32",
236+
dtype: str = "fp32",
237237
number_of_partitions: Optional[int] = None,
238238
min_workers: Optional[int] = None,
239239
max_workers: Optional[int] = None,
@@ -264,7 +264,7 @@ def __init__(
264264
task (str): The HuggingFace/NLP task you want to launch this model for. Defaults to
265265
None.
266266
If not provided, the task will be inferred from the model architecture by DJL.
267-
data_type (str): The data type to use for loading your model. Accepted values are
267+
dtype (str): The data type to use for loading your model. Accepted values are
268268
"fp32", "fp16", "bf16", "int8". Defaults to "fp32".
269269
number_of_partitions (int): The number of GPUs to partition the model across. The
270270
partitioning strategy is determined by the selected backend. If DeepSpeed is
@@ -322,13 +322,20 @@ def __init__(
322322
"You only need to set model_id and ensure it points to uncompressed model "
323323
"artifacts in s3, or a valid HuggingFace Hub model_id."
324324
)
325+
data_type = kwargs.pop("data_type", None)
326+
if data_type:
327+
logger.warning(
328+
"data_type is being deprecated in favor of dtype. Please migrate use of data_type"
329+
" to dtype. Support for data_type will be removed in a future release"
330+
)
331+
dtype = dtype or data_type
325332
super(DJLModel, self).__init__(
326333
None, image_uri, role, entry_point, predictor_cls=predictor_cls, **kwargs
327334
)
328335
self.model_id = model_id
329336
self.djl_version = djl_version
330337
self.task = task
331-
self.data_type = data_type
338+
self.dtype = dtype
332339
self.number_of_partitions = number_of_partitions
333340
self.min_workers = min_workers
334341
self.max_workers = max_workers
@@ -372,7 +379,7 @@ def transformer(self, **_):
372379
"DJLModels do not currently support Batch Transform inference jobs"
373380
)
374381

375-
def right_size(self, checkpoint_data_type: str):
382+
def right_size(self, **_):
376383
"""Not implemented.
377384
378385
DJLModels do not support SageMaker Inference Recommendation Jobs.
@@ -573,8 +580,8 @@ def generate_serving_properties(self, serving_properties=None) -> Dict[str, str]
573580
serving_properties["option.entryPoint"] = self.entry_point
574581
if self.task:
575582
serving_properties["option.task"] = self.task
576-
if self.data_type:
577-
serving_properties["option.dtype"] = self.data_type
583+
if self.dtype:
584+
serving_properties["option.dtype"] = self.dtype
578585
if self.min_workers:
579586
serving_properties["minWorkers"] = self.min_workers
580587
if self.max_workers:
@@ -779,7 +786,7 @@ def __init__(
779786
None.
780787
load_in_8bit (bool): Whether to load the model in int8 precision using bits and bytes
781788
quantization. This is only supported for select model architectures.
782-
Defaults to False. If ``data_type`` is int8, then this is set to True.
789+
Defaults to False. If ``dtype`` is int8, then this is set to True.
783790
low_cpu_mem_usage (bool): Whether to limit CPU memory usage to 1x model size during
784791
model loading. This is an experimental feature in HuggingFace. This is useful when
785792
loading multiple instances of your model in parallel. Defaults to False.
@@ -832,19 +839,19 @@ def generate_serving_properties(self, serving_properties=None) -> Dict[str, str]
832839
if self.device_map:
833840
serving_properties["option.device_map"] = self.device_map
834841
if self.load_in_8bit:
835-
if self.data_type != "int8":
836-
raise ValueError("Set data_type='int8' to use load_in_8bit")
842+
if self.dtype != "int8":
843+
raise ValueError("Set dtype='int8' to use load_in_8bit")
837844
serving_properties["option.load_in_8bit"] = self.load_in_8bit
838-
if self.data_type == "int8":
845+
if self.dtype == "int8":
839846
serving_properties["option.load_in_8bit"] = True
840847
if self.low_cpu_mem_usage:
841848
serving_properties["option.low_cpu_mem_usage"] = self.low_cpu_mem_usage
842849
# This is a workaround due to a bug in our built in handler for huggingface
843850
# TODO: This needs to be fixed when new dlc is published
844851
if (
845852
serving_properties["option.entryPoint"] == "djl_python.huggingface"
846-
and self.data_type
847-
and self.data_type != "auto"
853+
and self.dtype
854+
and self.dtype != "auto"
848855
):
849856
serving_properties["option.dtype"] = "auto"
850857
serving_properties.pop("option.load_in_8bit", None)

tests/unit/test_djl_inference.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -351,12 +351,12 @@ def test_generate_huggingface_serving_properties_invalid_configurations(
351351
VALID_UNCOMPRESSED_MODEL_DATA,
352352
ROLE,
353353
sagemaker_session=sagemaker_session,
354-
data_type="fp16",
354+
dtype="fp16",
355355
load_in_8bit=True,
356356
)
357357
with pytest.raises(ValueError) as invalid_config:
358358
_ = model.generate_serving_properties()
359-
assert str(invalid_config.value).startswith("Set data_type='int8' to use load_in_8bit")
359+
assert str(invalid_config.value).startswith("Set dtype='int8' to use load_in_8bit")
360360

361361
model = HuggingFaceAccelerateModel(
362362
VALID_UNCOMPRESSED_MODEL_DATA,
@@ -391,7 +391,7 @@ def test_generate_serving_properties_with_valid_configurations(
391391
min_workers=1,
392392
max_workers=3,
393393
job_queue_size=4,
394-
data_type="fp16",
394+
dtype="fp16",
395395
parallel_loading=True,
396396
model_loading_timeout=120,
397397
prediction_timeout=4,
@@ -429,7 +429,7 @@ def test_generate_serving_properties_with_valid_configurations(
429429
sagemaker_session=sagemaker_session,
430430
tensor_parallel_degree=1,
431431
task="text-generation",
432-
data_type="bf16",
432+
dtype="bf16",
433433
max_tokens=2048,
434434
low_cpu_mem_usage=True,
435435
enable_cuda_graph=True,
@@ -459,7 +459,7 @@ def test_generate_serving_properties_with_valid_configurations(
459459
number_of_partitions=1,
460460
device_id=4,
461461
device_map="balanced",
462-
data_type="fp32",
462+
dtype="fp32",
463463
low_cpu_mem_usage=False,
464464
)
465465
serving_properties = model.generate_serving_properties()
@@ -513,7 +513,7 @@ def test_deploy_model_no_local_code(
513513
ROLE,
514514
sagemaker_session=sagemaker_session,
515515
number_of_partitions=4,
516-
data_type="fp16",
516+
dtype="fp16",
517517
container_log_level=logging.DEBUG,
518518
env=ENV,
519519
)

0 commit comments

Comments
 (0)