[Inference Providers] fix inference with URL endpoints (#3041)

hanouticelina · Wauplin · web-flow · commit 1e40c62005cb · 2025-05-06T12:41:25.000+02:00
* fix inference with url endpoints

* style

* parentheses

* add test

* Update tests/test_inference_client.py

Co-authored-by: Lucain &lt;lucain@huggingface.co&gt;

* Update tests/test_inference_client.py

Co-authored-by: Lucain &lt;lucain@huggingface.co&gt;

* Update tests/test_inference_client.py

---------

Co-authored-by: Lucain &lt;lucain@huggingface.co&gt;
diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py
@@ -883,7 +883,13 @@ def chat_completion(
         payload_model = model or self.model
 
         # Get the provider helper
-        provider_helper = get_provider_helper(self.provider, task="conversational", model=payload_model)
+        provider_helper = get_provider_helper(
+            self.provider,
+            task="conversational",
+            model=model_id_or_url
+            if model_id_or_url is not None and model_id_or_url.startswith(("http://", "https://"))
+            else payload_model,
+        )
 
         # Prepare the payload
         parameters = {
diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py
@@ -923,7 +923,13 @@ async def chat_completion(
         payload_model = model or self.model
 
         # Get the provider helper
-        provider_helper = get_provider_helper(self.provider, task="conversational", model=payload_model)
+        provider_helper = get_provider_helper(
+            self.provider,
+            task="conversational",
+            model=model_id_or_url
+            if model_id_or_url is not None and model_id_or_url.startswith(("http://", "https://"))
+            else payload_model,
+        )
 
         # Prepare the payload
         parameters = {
diff --git a/src/huggingface_hub/inference/_providers/__init__.py b/src/huggingface_hub/inference/_providers/__init__.py
@@ -147,7 +147,9 @@ def get_provider_helper(
         ValueError: If provider or task is not supported
     """
 
-    if model is None and provider in (None, "auto"):
+    if (model is None and provider in (None, "auto")) or (
+        model is not None and model.startswith(("http://", "https://"))
+    ):
         provider = "hf-inference"
 
     if provider is None:
diff --git a/tests/test_inference_client.py b/tests/test_inference_client.py
@@ -1087,3 +1087,79 @@ def test_warning_if_bill_to_with_direct_calls(self):
             match="You've provided an external provider's API key, so requests will be billed directly by the provider.",
         ):
             InferenceClient(bill_to="openai", token="replicate_key", provider="replicate")
+
+
+@pytest.mark.parametrize(
+    "client_init_arg, init_kwarg_name, expected_request_url, expected_payload_model",
+    [
+        # passing a custom endpoint in the model argument
+        pytest.param(
+            "https://my-custom-endpoint.com/custom_path",
+            "model",
+            "https://my-custom-endpoint.com/custom_path/v1/chat/completions",
+            "dummy",
+            id="client_model_is_url",
+        ),
+        # passing a custom endpoint in the base_url argument
+        pytest.param(
+            "https://another-endpoint.com/v1/",
+            "base_url",
+            "https://another-endpoint.com/v1/chat/completions",
+            "dummy",
+            id="client_base_url_is_url",
+        ),
+        # passing a model ID
+        pytest.param(
+            "username/repo_name",
+            "model",
+            "https://router.huggingface.co/hf-inference/models/username/repo_name/v1/chat/completions",
+            "username/repo_name",
+            id="client_model_is_id",
+        ),
+        # passing a custom endpoint in the model argument
+        pytest.param(
+            "https://specific-chat-endpoint.com/v1/chat/completions",
+            "model",
+            "https://specific-chat-endpoint.com/v1/chat/completions",
+            "dummy",
+            id="client_model_is_full_chat_url",
+        ),
+        # passing a localhost URL in the model argument
+        pytest.param(
+            "http://localhost:8080",
+            "model",
+            "http://localhost:8080/v1/chat/completions",
+            "dummy",
+            id="client_model_is_localhost_url",
+        ),
+        # passing a localhost URL in the base_url argument
+        pytest.param(
+            "http://127.0.0.1:8000/custom/path/v1",
+            "base_url",
+            "http://127.0.0.1:8000/custom/path/v1/chat/completions",
+            "dummy",
+            id="client_base_url_is_localhost_ip_with_path",
+        ),
+    ],
+)
+def test_chat_completion_url_resolution(
+    mocker, client_init_arg, init_kwarg_name, expected_request_url, expected_payload_model
+):
+    init_kwargs = {init_kwarg_name: client_init_arg, "provider": "hf-inference"}
+    client = InferenceClient(**init_kwargs)
+
+    mock_response_content = b'{"choices": [{"message": {"content": "Mock response"}}]}'
+    mocker.patch(
+        "huggingface_hub.inference._providers.hf_inference._check_supported_task",
+        return_value=None,
+    )
+
+    with patch.object(InferenceClient, "_inner_post", return_value=mock_response_content) as mock_inner_post:
+        client.chat_completion(messages=[{"role": "user", "content": "Hello?"}], stream=False)
+
+        mock_inner_post.assert_called_once()
+
+        request_params = mock_inner_post.call_args[0][0]
+        assert request_params.url == expected_request_url
+        assert request_params.json is not None
+        assert request_params.json.get("model") == expected_payload_model