Add basic loadgen timer implementation

sjmonson · sjmonson · commit b599480b1d3d · 2025-02-03T14:12:07.000-05:00
diff --git a/inference_perf/loadgen/load_generator.py b/inference_perf/loadgen/load_generator.py
@@ -11,3 +11,66 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+import time
+from abc import ABC
+from typing import Generator, Optional
+import numpy as np
+
+
+class LoadTimer(ABC):
+    """Abstract base class for load generators."""
+    def __init__(self, *args) -> None:
+        # TODO: Commmon functionallity
+        pass
+
+    def start_timer(self, initial: Optional[float] = None) -> Generator[float, None, None]:
+        """Yield the times at which requests should be made."""
+        raise NotImplementedError
+
+
+class ConstantLoadTimer(LoadTimer):
+    """
+    A load generator that generates requests at a constant rate.
+    Introduces a small amount of random noise in timing.
+    """
+    def __init__(self, rate: float) -> None:
+        self._rate = rate
+        # TODO: Make random state a global seed
+        self._rand = np.random.default_rng()
+
+    def start_timer(self, initial: Optional[float] = None) -> Generator[float, None, None]:
+        # Set start time
+        next_time = time.monotonic() if initial is None else initial
+
+        # Given a rate, yield a time to wait before the next request
+        while True:
+            next_time += self._rand.exponential(1 / self._rate)
+            yield next_time
+
+
+class PoissonLoadTimer(LoadTimer):
+    def __init__(self, rate: float) -> None:
+        self._rate = rate
+        self._rand = np.random.default_rng()
+
+    def start_timer(self, initial: Optional[float] = None) -> Generator[float, None, None]:
+        # Set start time
+        next_time = time.monotonic() if initial is None else initial
+
+        # Given a rate, yield a time to wait before the next request
+        while True:
+            # How many requests in the next second
+            req_count = self._rand.poisson(self._rate)
+
+            # If no requests, wait for 1 second
+            if req_count < 1:
+                yield next_time + 1.0
+                continue
+
+            # Schedule the requests over the next second
+            timer = ConstantLoadTimer(req_count)
+            times = timer.start_timer(next_time)
+            for _ in range(req_count):
+                next_time = next(times)
+                yield next_time
diff --git a/pdm.lock b/pdm.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,6 +5,7 @@ description = "A GenAI inference performance benchmarking tool."
 authors = []
 dependencies = [
     "aiohttp>=3.11.11",
+    "numpy>=2.2.2",
 ]
 requires-python = ">=3.12"
 readme = "README.md"

Original file line number	Diff line number	Diff line change
`@@ -5,6 +5,7 @@ description = "A GenAI inference performance benchmarking tool."`
`5`	`5`	`authors = []`
`6`	`6`	`dependencies = [`
`7`	`7`	`"aiohttp>=3.11.11",`
	`8`	`+ "numpy>=2.2.2",`
`8`	`9`	`]`
`9`	`10`	`requires-python = ">=3.12"`
`10`	`11`	`readme = "README.md"`