diff --git a/inference_perf/client/README.md b/inference_perf/client/README.md new file mode 100644 index 0000000..ff973c7 --- /dev/null +++ b/inference_perf/client/README.md @@ -0,0 +1,3 @@ +# Custom Clients + +All custom clients are organized in this directory including model server and metrics clients. The directory structure is organized to reflect relationships/commonalities between clients. \ No newline at end of file diff --git a/inference_perf/client/model_servers/README.md b/inference_perf/client/model_servers/README.md new file mode 100644 index 0000000..6ff6fa2 --- /dev/null +++ b/inference_perf/client/model_servers/README.md @@ -0,0 +1,14 @@ +# Model Server Clients + +Common functionality appears beween model servers with similar input and output types. These model servers are organized accordingly. + +Todo: +- **Text to Text**: + - Naive_transformers + - tensorrt_llm_triton + - sax + - tgi + - vllm + - jetstream +- **Text to Image**: + - Maxdiffusion \ No newline at end of file diff --git a/inference_perf/client/model_servers/__init__.py b/inference_perf/client/model_servers/__init__.py new file mode 100644 index 0000000..81be708 --- /dev/null +++ b/inference_perf/client/model_servers/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2025 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/inference_perf/client/model_servers/client.py b/inference_perf/client/model_servers/client.py new file mode 100644 index 0000000..5b4e412 --- /dev/null +++ b/inference_perf/client/model_servers/client.py @@ -0,0 +1,78 @@ +from abc import ABC, abstractmethod +from typing import Any, List +import asyncio +import aiohttp + + +class ErrorsReport: + ClientConnectorErrors: int + TimeoutErrors: int + ContentTypeErrors: int + ClientOSErrors: int + ServerDisconnectedErrors: int + unknown_errors: int + + def __init__(self) -> None: + self.ClientConnectorErrors = 0 + self.TimeoutErrors = 0 + self.ContentTypeErrors = 0 + self.ClientOSErrors = 0 + self.ServerDisconnectedErrors = 0 + self.unknown_errors = 0 + + def to_dict(self) -> dict[str, int]: + return {k: v for k, v in self.__dict__.items() if isinstance(v, int)} + + def record_error(self, error: Exception) -> None: + if isinstance(error, aiohttp.client_exceptions.ClientConnectorError): + self.ClientConnectorErrors += 1 + print(f"ClientConnectorError: {error}") + elif isinstance(error, asyncio.TimeoutError): + self.TimeoutErrors += 1 + print(f"TimeoutError: {error}") + elif isinstance(error, aiohttp.client_exceptions.ContentTypeError): + self.ContentTypeErrors += 1 + print(f"ContentTypeError: {error}") + elif isinstance(error, aiohttp.client_exceptions.ClientOSError): + self.ClientOSErrors += 1 + print(f"ClientOSError: {error}") + elif isinstance(error, aiohttp.client_exceptions.ServerDisconnectedError): + self.ServerDisconnectedErrors += 1 + print(f"ServerDisconnectedError: {error}") + else: + self.unknown_errors += 1 + print(f"Unknown error: {error}") + + def append_report(self, report: "ErrorsReport") -> None: + self.ClientConnectorErrors += report.ClientConnectorErrors + self.TimeoutErrors += report.TimeoutErrors + self.ContentTypeErrors += report.ContentTypeErrors + self.ClientOSErrors += report.ClientOSErrors + self.ServerDisconnectedErrors += report.ServerDisconnectedErrors + self.unknown_errors += report.unknown_errors + + +class Model_Server_Client(ABC): + # The client will collect a summary of all observed errors + Errors: ErrorsReport + + @abstractmethod + def summary(self) -> Any: + """ + Returns summary data derived from all inputs and outputs, depends on the clients input and output data types and as such subclasses should implement this at the client data type level (e.g., text-to-text, text-to-image). + """ + pass + + @abstractmethod + def request(self, *args: Any, **kwargs: Any) -> Any: + """ + This is the method loadgen should use to make requests to a model server + """ + pass + + @abstractmethod + def list_model_server_metrics(self) -> list[str]: + """ + Returns list of model server metrics of interest. + """ + pass