@@ -233,7 +233,7 @@ def __init__(
233
233
role : str ,
234
234
djl_version : Optional [str ] = None ,
235
235
task : Optional [str ] = None ,
236
- data_type : str = "fp32" ,
236
+ dtype : str = "fp32" ,
237
237
number_of_partitions : Optional [int ] = None ,
238
238
min_workers : Optional [int ] = None ,
239
239
max_workers : Optional [int ] = None ,
@@ -264,7 +264,7 @@ def __init__(
264
264
task (str): The HuggingFace/NLP task you want to launch this model for. Defaults to
265
265
None.
266
266
If not provided, the task will be inferred from the model architecture by DJL.
267
- data_type (str): The data type to use for loading your model. Accepted values are
267
+ dtype (str): The data type to use for loading your model. Accepted values are
268
268
"fp32", "fp16", "bf16", "int8". Defaults to "fp32".
269
269
number_of_partitions (int): The number of GPUs to partition the model across. The
270
270
partitioning strategy is determined by the selected backend. If DeepSpeed is
@@ -322,13 +322,20 @@ def __init__(
322
322
"You only need to set model_id and ensure it points to uncompressed model "
323
323
"artifacts in s3, or a valid HuggingFace Hub model_id."
324
324
)
325
+ data_type = kwargs .pop ("data_type" , None )
326
+ if data_type :
327
+ logger .warning (
328
+ "data_type is being deprecated in favor of dtype. Please migrate use of data_type"
329
+ " to dtype. Support for data_type will be removed in a future release"
330
+ )
331
+ dtype = dtype or data_type
325
332
super (DJLModel , self ).__init__ (
326
333
None , image_uri , role , entry_point , predictor_cls = predictor_cls , ** kwargs
327
334
)
328
335
self .model_id = model_id
329
336
self .djl_version = djl_version
330
337
self .task = task
331
- self .data_type = data_type
338
+ self .dtype = dtype
332
339
self .number_of_partitions = number_of_partitions
333
340
self .min_workers = min_workers
334
341
self .max_workers = max_workers
@@ -372,7 +379,7 @@ def transformer(self, **_):
372
379
"DJLModels do not currently support Batch Transform inference jobs"
373
380
)
374
381
375
- def right_size (self , checkpoint_data_type : str ):
382
+ def right_size (self , ** _ ):
376
383
"""Not implemented.
377
384
378
385
DJLModels do not support SageMaker Inference Recommendation Jobs.
@@ -573,8 +580,8 @@ def generate_serving_properties(self, serving_properties=None) -> Dict[str, str]
573
580
serving_properties ["option.entryPoint" ] = self .entry_point
574
581
if self .task :
575
582
serving_properties ["option.task" ] = self .task
576
- if self .data_type :
577
- serving_properties ["option.dtype" ] = self .data_type
583
+ if self .dtype :
584
+ serving_properties ["option.dtype" ] = self .dtype
578
585
if self .min_workers :
579
586
serving_properties ["minWorkers" ] = self .min_workers
580
587
if self .max_workers :
@@ -779,7 +786,7 @@ def __init__(
779
786
None.
780
787
load_in_8bit (bool): Whether to load the model in int8 precision using bits and bytes
781
788
quantization. This is only supported for select model architectures.
782
- Defaults to False. If ``data_type `` is int8, then this is set to True.
789
+ Defaults to False. If ``dtype `` is int8, then this is set to True.
783
790
low_cpu_mem_usage (bool): Whether to limit CPU memory usage to 1x model size during
784
791
model loading. This is an experimental feature in HuggingFace. This is useful when
785
792
loading multiple instances of your model in parallel. Defaults to False.
@@ -832,19 +839,19 @@ def generate_serving_properties(self, serving_properties=None) -> Dict[str, str]
832
839
if self .device_map :
833
840
serving_properties ["option.device_map" ] = self .device_map
834
841
if self .load_in_8bit :
835
- if self .data_type != "int8" :
836
- raise ValueError ("Set data_type ='int8' to use load_in_8bit" )
842
+ if self .dtype != "int8" :
843
+ raise ValueError ("Set dtype ='int8' to use load_in_8bit" )
837
844
serving_properties ["option.load_in_8bit" ] = self .load_in_8bit
838
- if self .data_type == "int8" :
845
+ if self .dtype == "int8" :
839
846
serving_properties ["option.load_in_8bit" ] = True
840
847
if self .low_cpu_mem_usage :
841
848
serving_properties ["option.low_cpu_mem_usage" ] = self .low_cpu_mem_usage
842
849
# This is a workaround due to a bug in our built in handler for huggingface
843
850
# TODO: This needs to be fixed when new dlc is published
844
851
if (
845
852
serving_properties ["option.entryPoint" ] == "djl_python.huggingface"
846
- and self .data_type
847
- and self .data_type != "auto"
853
+ and self .dtype
854
+ and self .dtype != "auto"
848
855
):
849
856
serving_properties ["option.dtype" ] = "auto"
850
857
serving_properties .pop ("option.load_in_8bit" , None )
0 commit comments