@@ -250,7 +250,7 @@ class ModelBuilder(Triton, DJL, JumpStart, TGI, Transformers, TensorflowServing,
250
250
default = None , metadata = {"help" : "Define sagemaker session for execution" }
251
251
)
252
252
name : Optional [str ] = field (
253
- default = "model-name-" + uuid .uuid1 ().hex ,
253
+ default_factory = lambda : "model-name-" + uuid .uuid1 ().hex ,
254
254
metadata = {"help" : "Define the model name" },
255
255
)
256
256
mode : Optional [Mode ] = field (
@@ -1130,7 +1130,7 @@ def build(
1130
1130
def _get_processing_unit (self ):
1131
1131
"""Detects if the resource requirements are intended for a CPU or GPU instance."""
1132
1132
# Assume custom orchestrator will be deployed as an endpoint to a CPU instance
1133
- if not self .resource_requirements :
1133
+ if not self .resource_requirements or not self . resource_requirements . num_accelerators :
1134
1134
return "cpu"
1135
1135
for ic in self .modelbuilder_list or []:
1136
1136
if ic .resource_requirements .num_accelerators > 0 :
@@ -1171,10 +1171,10 @@ def _get_ic_resource_requirements(self, mb: ModelBuilder = None) -> ModelBuilder
1171
1171
1172
1172
@_capture_telemetry ("build_custom_orchestrator" )
1173
1173
def _get_smd_image_uri (self , processing_unit : str = None ) -> str :
1174
- """Gets the SMD Inference URI.
1174
+ """Gets the SMD Inference Image URI.
1175
1175
1176
1176
Returns:
1177
- str: Pytorch DLC URI.
1177
+ str: SMD Inference Image URI.
1178
1178
"""
1179
1179
from sagemaker import image_uris
1180
1180
import sys
@@ -1183,10 +1183,10 @@ def _get_smd_image_uri(self, processing_unit: str = None) -> str:
1183
1183
from packaging .version import Version
1184
1184
1185
1185
formatted_py_version = f"py{ sys .version_info .major } { sys .version_info .minor } "
1186
- if Version (f"{ sys .version_info .major } { sys .version_info .minor } " ) < Version ("3.11.11 " ):
1186
+ if Version (f"{ sys .version_info .major } { sys .version_info .minor } " ) < Version ("3.12 " ):
1187
1187
raise ValueError (
1188
1188
f"Found Python version { formatted_py_version } but"
1189
- f"Custom orchestrator deployment requires Python version >= 3.11.11 ."
1189
+ f"Custom orchestrator deployment requires Python version >= 3.12 ."
1190
1190
)
1191
1191
1192
1192
INSTANCE_TYPES = {"cpu" : "ml.c5.xlarge" , "gpu" : "ml.g5.4xlarge" }
@@ -1956,7 +1956,7 @@ def deploy(
1956
1956
]
1957
1957
] = None ,
1958
1958
custom_orchestrator_instance_type : str = None ,
1959
- custom_orchestrator_initial_instance_count : int = 1 ,
1959
+ custom_orchestrator_initial_instance_count : int = None ,
1960
1960
** kwargs ,
1961
1961
) -> Union [Predictor , Transformer , List [Predictor ]]:
1962
1962
"""Deploys the built Model.
@@ -1977,7 +1977,7 @@ def deploy(
1977
1977
"""
1978
1978
if not hasattr (self , "built_model" ) and not hasattr (self , "_deployables" ):
1979
1979
raise ValueError ("Model needs to be built before deploying" )
1980
- endpoint_name = unique_name_from_base ("endpoint-name" )
1980
+ endpoint_name = endpoint_name or unique_name_from_base ("endpoint-name" )
1981
1981
1982
1982
if not hasattr (self , "_deployables" ):
1983
1983
if not inference_config : # Real-time Deployment
@@ -2038,13 +2038,14 @@ def deploy(
2038
2038
)
2039
2039
if self ._deployables .get ("CustomOrchestrator" , None ):
2040
2040
custom_orchestrator = self ._deployables .get ("CustomOrchestrator" )
2041
+ if not custom_orchestrator_instance_type and not instance_type :
2042
+ logger .warning (
2043
+ "Deploying custom orchestrator as an endpoint but no instance type was "
2044
+ "set. Defaulting to `ml.c5.xlarge`."
2045
+ )
2046
+ custom_orchestrator_instance_type = "ml.c5.xlarge"
2047
+ custom_orchestrator_initial_instance_count = 1
2041
2048
if custom_orchestrator ["Mode" ] == "Endpoint" :
2042
- if not custom_orchestrator_instance_type :
2043
- logger .warning (
2044
- "Deploying custom orchestrator as an endpoint but no instance type was "
2045
- "set. Defaulting to `ml.c5.xlarge`."
2046
- )
2047
- custom_orchestrator_instance_type = "ml.c5.xlarge"
2048
2049
logger .info (
2049
2050
"Deploying custom orchestrator on instance type %s." ,
2050
2051
custom_orchestrator_instance_type ,
@@ -2057,13 +2058,18 @@ def deploy(
2057
2058
)
2058
2059
)
2059
2060
elif custom_orchestrator ["Mode" ] == "InferenceComponent" :
2061
+ logger .info (
2062
+ "Deploying custom orchestrator as an inference component "
2063
+ f"to endpoint { endpoint_name } "
2064
+ )
2060
2065
predictors .append (
2061
2066
self ._deploy_for_ic (
2062
2067
ic_data = custom_orchestrator ,
2063
2068
container_timeout_in_seconds = container_timeout_in_second ,
2064
2069
instance_type = custom_orchestrator_instance_type or instance_type ,
2065
2070
initial_instance_count = custom_orchestrator_initial_instance_count
2066
2071
or initial_instance_count ,
2072
+ endpoint_name = endpoint_name ,
2067
2073
** kwargs ,
2068
2074
)
2069
2075
)
0 commit comments