@@ -7573,6 +7573,7 @@ def create_inference_endpoint(
7573
7573
revision : Optional [str ] = None ,
7574
7574
task : Optional [str ] = None ,
7575
7575
custom_image : Optional [Dict ] = None ,
7576
+ env : Optional [Dict [str , str ]] = None ,
7576
7577
secrets : Optional [Dict [str , str ]] = None ,
7577
7578
type : InferenceEndpointType = InferenceEndpointType .PROTECTED ,
7578
7579
domain : Optional [str ] = None ,
@@ -7616,6 +7617,8 @@ def create_inference_endpoint(
7616
7617
custom_image (`Dict`, *optional*):
7617
7618
A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
7618
7619
Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
7620
+ env (`Dict[str, str]`, *optional*):
7621
+ Non-secret environment variables to inject in the container environment.
7619
7622
secrets (`Dict[str, str]`, *optional*):
7620
7623
Secret values to inject in the container environment.
7621
7624
type ([`InferenceEndpointType]`, *optional*):
@@ -7678,14 +7681,14 @@ def create_inference_endpoint(
7678
7681
... type="protected",
7679
7682
... instance_size="x1",
7680
7683
... instance_type="nvidia-a10g",
7684
+ ... env={
7685
+ ... "MAX_BATCH_PREFILL_TOKENS": "2048",
7686
+ ... "MAX_INPUT_LENGTH": "1024",
7687
+ ... "MAX_TOTAL_TOKENS": "1512",
7688
+ ... "MODEL_ID": "/repository"
7689
+ ... },
7681
7690
... custom_image={
7682
7691
... "health_route": "/health",
7683
- ... "env": {
7684
- ... "MAX_BATCH_PREFILL_TOKENS": "2048",
7685
- ... "MAX_INPUT_LENGTH": "1024",
7686
- ... "MAX_TOTAL_TOKENS": "1512",
7687
- ... "MODEL_ID": "/repository"
7688
- ... },
7689
7692
... "url": "ghcr.io/huggingface/text-generation-inference:1.1.0",
7690
7693
... },
7691
7694
... secrets={"MY_SECRET_KEY": "secret_value"},
@@ -7723,6 +7726,8 @@ def create_inference_endpoint(
7723
7726
},
7724
7727
"type" : type ,
7725
7728
}
7729
+ if env :
7730
+ payload ["model" ]["env" ] = env
7726
7731
if secrets :
7727
7732
payload ["model" ]["secrets" ] = secrets
7728
7733
if domain is not None or path is not None :
@@ -7897,6 +7902,7 @@ def update_inference_endpoint(
7897
7902
revision : Optional [str ] = None ,
7898
7903
task : Optional [str ] = None ,
7899
7904
custom_image : Optional [Dict ] = None ,
7905
+ env : Optional [Dict [str , str ]] = None ,
7900
7906
secrets : Optional [Dict [str , str ]] = None ,
7901
7907
# Route update
7902
7908
domain : Optional [str ] = None ,
@@ -7942,6 +7948,8 @@ def update_inference_endpoint(
7942
7948
custom_image (`Dict`, *optional*):
7943
7949
A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
7944
7950
Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
7951
+ env (`Dict[str, str]`, *optional*):
7952
+ Non-secret environment variables to inject in the container environment
7945
7953
secrets (`Dict[str, str]`, *optional*):
7946
7954
Secret values to inject in the container environment.
7947
7955
@@ -7992,6 +8000,8 @@ def update_inference_endpoint(
7992
8000
payload ["model" ]["task" ] = task
7993
8001
if custom_image is not None :
7994
8002
payload ["model" ]["image" ] = {"custom" : custom_image }
8003
+ if env is not None :
8004
+ payload ["model" ]["env" ] = env
7995
8005
if secrets is not None :
7996
8006
payload ["model" ]["secrets" ] = secrets
7997
8007
if domain is not None :
0 commit comments