Skip to content

Commit b599480

Browse files
committed
Add basic loadgen timer implementation
1 parent eb3ce16 commit b599480

File tree

3 files changed

+105
-1
lines changed

3 files changed

+105
-1
lines changed

Diff for: inference_perf/loadgen/load_generator.py

+63
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,66 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
15+
import time
16+
from abc import ABC
17+
from typing import Generator, Optional
18+
import numpy as np
19+
20+
21+
class LoadTimer(ABC):
22+
"""Abstract base class for load generators."""
23+
def __init__(self, *args) -> None:
24+
# TODO: Commmon functionallity
25+
pass
26+
27+
def start_timer(self, initial: Optional[float] = None) -> Generator[float, None, None]:
28+
"""Yield the times at which requests should be made."""
29+
raise NotImplementedError
30+
31+
32+
class ConstantLoadTimer(LoadTimer):
33+
"""
34+
A load generator that generates requests at a constant rate.
35+
Introduces a small amount of random noise in timing.
36+
"""
37+
def __init__(self, rate: float) -> None:
38+
self._rate = rate
39+
# TODO: Make random state a global seed
40+
self._rand = np.random.default_rng()
41+
42+
def start_timer(self, initial: Optional[float] = None) -> Generator[float, None, None]:
43+
# Set start time
44+
next_time = time.monotonic() if initial is None else initial
45+
46+
# Given a rate, yield a time to wait before the next request
47+
while True:
48+
next_time += self._rand.exponential(1 / self._rate)
49+
yield next_time
50+
51+
52+
class PoissonLoadTimer(LoadTimer):
53+
def __init__(self, rate: float) -> None:
54+
self._rate = rate
55+
self._rand = np.random.default_rng()
56+
57+
def start_timer(self, initial: Optional[float] = None) -> Generator[float, None, None]:
58+
# Set start time
59+
next_time = time.monotonic() if initial is None else initial
60+
61+
# Given a rate, yield a time to wait before the next request
62+
while True:
63+
# How many requests in the next second
64+
req_count = self._rand.poisson(self._rate)
65+
66+
# If no requests, wait for 1 second
67+
if req_count < 1:
68+
yield next_time + 1.0
69+
continue
70+
71+
# Schedule the requests over the next second
72+
timer = ConstantLoadTimer(req_count)
73+
times = timer.start_timer(next_time)
74+
for _ in range(req_count):
75+
next_time = next(times)
76+
yield next_time

Diff for: pdm.lock

+41-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ description = "A GenAI inference performance benchmarking tool."
55
authors = []
66
dependencies = [
77
"aiohttp>=3.11.11",
8+
"numpy>=2.2.2",
89
]
910
requires-python = ">=3.12"
1011
readme = "README.md"

0 commit comments

Comments
 (0)