Skip to content

Commit 2bafd2a

Browse files
Add the 'env' parameter to creating/updating Inference Endpoints (#3045)
* Add the 'env' parameter to creating/updating Inference Endpoints * fix example --------- Co-authored-by: Celina Hanouti <[email protected]>
1 parent fff83af commit 2bafd2a

File tree

1 file changed

+16
-6
lines changed

1 file changed

+16
-6
lines changed

src/huggingface_hub/hf_api.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7573,6 +7573,7 @@ def create_inference_endpoint(
75737573
revision: Optional[str] = None,
75747574
task: Optional[str] = None,
75757575
custom_image: Optional[Dict] = None,
7576+
env: Optional[Dict[str, str]] = None,
75767577
secrets: Optional[Dict[str, str]] = None,
75777578
type: InferenceEndpointType = InferenceEndpointType.PROTECTED,
75787579
domain: Optional[str] = None,
@@ -7616,6 +7617,8 @@ def create_inference_endpoint(
76167617
custom_image (`Dict`, *optional*):
76177618
A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
76187619
Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
7620+
env (`Dict[str, str]`, *optional*):
7621+
Non-secret environment variables to inject in the container environment.
76197622
secrets (`Dict[str, str]`, *optional*):
76207623
Secret values to inject in the container environment.
76217624
type ([`InferenceEndpointType]`, *optional*):
@@ -7678,14 +7681,14 @@ def create_inference_endpoint(
76787681
... type="protected",
76797682
... instance_size="x1",
76807683
... instance_type="nvidia-a10g",
7684+
... env={
7685+
... "MAX_BATCH_PREFILL_TOKENS": "2048",
7686+
... "MAX_INPUT_LENGTH": "1024",
7687+
... "MAX_TOTAL_TOKENS": "1512",
7688+
... "MODEL_ID": "/repository"
7689+
... },
76817690
... custom_image={
76827691
... "health_route": "/health",
7683-
... "env": {
7684-
... "MAX_BATCH_PREFILL_TOKENS": "2048",
7685-
... "MAX_INPUT_LENGTH": "1024",
7686-
... "MAX_TOTAL_TOKENS": "1512",
7687-
... "MODEL_ID": "/repository"
7688-
... },
76897692
... "url": "ghcr.io/huggingface/text-generation-inference:1.1.0",
76907693
... },
76917694
... secrets={"MY_SECRET_KEY": "secret_value"},
@@ -7723,6 +7726,8 @@ def create_inference_endpoint(
77237726
},
77247727
"type": type,
77257728
}
7729+
if env:
7730+
payload["model"]["env"] = env
77267731
if secrets:
77277732
payload["model"]["secrets"] = secrets
77287733
if domain is not None or path is not None:
@@ -7897,6 +7902,7 @@ def update_inference_endpoint(
78977902
revision: Optional[str] = None,
78987903
task: Optional[str] = None,
78997904
custom_image: Optional[Dict] = None,
7905+
env: Optional[Dict[str, str]] = None,
79007906
secrets: Optional[Dict[str, str]] = None,
79017907
# Route update
79027908
domain: Optional[str] = None,
@@ -7942,6 +7948,8 @@ def update_inference_endpoint(
79427948
custom_image (`Dict`, *optional*):
79437949
A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
79447950
Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
7951+
env (`Dict[str, str]`, *optional*):
7952+
Non-secret environment variables to inject in the container environment
79457953
secrets (`Dict[str, str]`, *optional*):
79467954
Secret values to inject in the container environment.
79477955
@@ -7992,6 +8000,8 @@ def update_inference_endpoint(
79928000
payload["model"]["task"] = task
79938001
if custom_image is not None:
79948002
payload["model"]["image"] = {"custom": custom_image}
8003+
if env is not None:
8004+
payload["model"]["env"] = env
79958005
if secrets is not None:
79968006
payload["model"]["secrets"] = secrets
79978007
if domain is not None:

0 commit comments

Comments
 (0)