@@ -84,7 +84,7 @@ def main():
84
84
'tol_lat_time_lo' : [], 'tol_lat_time_hi' : [],
85
85
'avg_prefill_queue_size' : [],
86
86
'avg_pending_tokens_perc' : [],
87
- 'avg_actual_tokens_perc' : [], 'request_count' : []},
87
+ 'avg_actual_tokens_perc' : [], 'request_count' : [], 'request_count_lo' : [], 'request_count_hi' : [] },
88
88
89
89
'smart' : {'latency' : [], 'latency_lo' : [], 'latency_hi' : [],
90
90
'estimated_latency' : [], 'estimated_latency_lo' : [], 'estimated_latency_hi' : [],
@@ -100,7 +100,7 @@ def main():
100
100
'tol_lat_time_lo' : [], 'tol_lat_time_hi' : [],
101
101
'avg_prefill_queue_size' : [],
102
102
'avg_pending_tokens_perc' : [],
103
- 'avg_actual_tokens_perc' : [], 'request_count' : []},
103
+ 'avg_actual_tokens_perc' : [], 'request_count' : [], 'request_count_lo' : [], 'request_count_hi ' : []},
104
104
105
105
'leastlatency' : {'latency' : [], 'latency_lo' : [], 'latency_hi' : [],
106
106
'throughput_prefill' : [], 'throughput_decode' : [],
@@ -114,7 +114,7 @@ def main():
114
114
'tol_lat_time_lo' : [], 'tol_lat_time_hi' : [],
115
115
'avg_prefill_queue_size' : [],
116
116
'avg_pending_tokens_perc' : [],
117
- 'avg_actual_tokens_perc' : [], 'request_count' : []},
117
+ 'avg_actual_tokens_perc' : [], 'request_count' : [], 'request_count_lo' : [], 'request_count_hi ' : []},
118
118
'least' : {'latency' : [], 'latency_lo' : [], 'latency_hi' : [],
119
119
'throughput_prefill' : [], 'throughput_decode' : [],
120
120
'throughput_prefill_lo' : [], 'throughput_decode_lo' : [],
@@ -127,7 +127,7 @@ def main():
127
127
'tol_lat_time_lo' : [], 'tol_lat_time_hi' : [],
128
128
'avg_prefill_queue_size' : [],
129
129
'avg_pending_tokens_perc' : [],
130
- 'avg_actual_tokens_perc' : [], 'request_count' : []},
130
+ 'avg_actual_tokens_perc' : [], 'request_count' : [], 'request_count_lo' : [], 'request_count_hi ' : []},
131
131
'random' : {'latency' : [], 'latency_lo' : [], 'latency_hi' : [],
132
132
'throughput_prefill' : [], 'throughput_decode' : [],
133
133
'throughput_prefill_lo' : [], 'throughput_decode_lo' : [],
@@ -140,21 +140,21 @@ def main():
140
140
'tol_lat_time_lo' : [], 'tol_lat_time_hi' : [],
141
141
'avg_prefill_queue_size' : [],
142
142
'avg_pending_tokens_perc' : [],
143
- 'avg_actual_tokens_perc' : [], 'request_count' : []},
143
+ 'avg_actual_tokens_perc' : [], 'request_count' : [], 'request_count_lo' : [], 'request_count_hi ' : []},
144
144
}
145
145
146
146
all_routing_types = [ routing_type ]
147
147
prompt_output_tuple = None
148
148
149
149
# Iterate over routing types
150
150
for routing_type in all_routing_types :
151
- print (f'Routing Type: { routing_type } ' )
151
+ # print(f'Routing Type: {routing_type}')
152
152
153
153
for i , _ in enumerate (rates_lo ):
154
154
req_dict = {}
155
155
req_dict_prefill = {}
156
156
SIM_DURATION = SIM_DURATIONS [i ]
157
- print (f'Simulate with rate: for lo { rates_lo [i ]} and for hi { rates_hi [i ]} and routing type: { routing_type } ' )
157
+ # print(f'Simulate with rate: for lo {rates_lo[i]} and for hi {rates_hi[i]} and routing type: {routing_type}')
158
158
sys .stdout .flush ()
159
159
# Simpy environment and LLM actors setup
160
160
env = simpy .Environment ()
@@ -292,28 +292,29 @@ def main():
292
292
l1 = [np .sum (list (dict (x ).values ())) for x in results [routing_type ]['target_pods_lo' ]][- 1 ]
293
293
l2 = [np .sum (list (dict (x ).values ())) for x in results [routing_type ]['target_pods_hi' ]][- 1 ]
294
294
295
- print (f'req count { (l1 , l2 )} ' )
295
+ # print(f'req count {(l1, l2)}')
296
296
sys .stdout .flush ()
297
- results [routing_type ]['request_count' ].append (len (completed_req ))
297
+ results [routing_type ]['request_count' ].append (len (filtered_req ))
298
+ results [routing_type ]['request_count_lo' ].append (len (filtered_req_lo ))
299
+ results [routing_type ]['request_count_hi' ].append (len (filtered_req_hi ))
298
300
299
301
if routing_type == 'smart' :
300
302
results [routing_type ]['estimated_latency' ].append (estimated_latency_cur )
301
303
results [routing_type ]['estimated_latency_lo' ].append (estimated_latency_cur_lo )
302
304
results [routing_type ]['estimated_latency_hi' ].append (estimated_latency_cur_hi )
303
- print (f"lo dist { Counter (target_pods_lo )} latency { latency_cur_lo } estimated_latency_lo { estimated_latency_cur_lo } " )
304
- print (f"hi dist { Counter (target_pods_hi )} latency { latency_cur_hi } estimated_latency_hi { estimated_latency_cur_hi } " )
305
- else :
306
- print (f"lo dist { Counter (target_pods_lo )} latency { latency_cur_lo } " )
307
- print (f"hi dist { Counter (target_pods_hi )} latency { latency_cur_hi } " )
308
-
309
- # Print the results for this qps
310
- print (f'QPS: { rates_lo [i ]} (lo), { rates_hi [i ]} (hi)' )
311
- print (f'% of lo requests below target: { pct_below_target_lo } %' )
312
- print (f'% of hi requests below target: { pct_below_target_hi } %' )
313
- print (f"prefill_queue_size { np .mean (prefill_queue_size )} " )
314
- print (f"pending_tokens_perc { np .mean (pending_tokens_at_arrival_perc )} " )
315
- print (f"actual_tokens_perc { np .mean (actual_tokens_at_arrival_perc )} " )
316
- sys .stdout .flush ()
305
+ #print(f"lo dist {Counter(target_pods_lo)} latency {latency_cur_lo} estimated_latency_lo {estimated_latency_cur_lo}")
306
+ #print(f"hi dist {Counter(target_pods_hi)} latency {latency_cur_hi} estimated_latency_hi {estimated_latency_cur_hi}")
307
+ #else:
308
+ #print(f"lo dist {Counter(target_pods_lo)} latency {latency_cur_lo} ")
309
+ #print(f"hi dist {Counter(target_pods_hi)} latency {latency_cur_hi} ")
310
+
311
+ # #print the results for this qps
312
+ #print(f'QPS: {rates_lo[i]} (lo), {rates_hi[i]} (hi)')
313
+ #print(f'% of lo requests below target: {pct_below_target_lo}%')
314
+ #print(f'% of hi requests below target: {pct_below_target_hi}%')
315
+ #print(f"prefill_queue_size {np.mean(prefill_queue_size)}")
316
+ #print(f"pending_tokens_perc {np.mean(pending_tokens_at_arrival_perc)}")
317
+ #print(f"actual_tokens_perc {np.mean(actual_tokens_at_arrival_perc)}")
317
318
318
319
319
320
@@ -331,8 +332,15 @@ def main():
331
332
if not os .path .exists (output_dir ):
332
333
os .makedirs (output_dir )
333
334
335
+ # Write results to CSV
336
+ # Ensure the output directory exists
337
+ output_dir = os .path .dirname (output_file )
338
+ if not os .path .exists (output_dir ):
339
+ os .makedirs (output_dir )
340
+
341
+ # Open the CSV file for writing
334
342
with open (output_file , 'w' , newline = '' ) as csvfile :
335
- fieldnames = ['RoutingType' , 'RateIndex' , 'Latency' , 'Latency_Lo' , 'Latency_Hi' , 'avg_prefill_queue_size' , 'avg_pending_tokens_perc' , 'avg_actual_tokens_perc' , 'pct_below_latency_target_lo' , 'pct_below_latency_target_hi' ]
343
+ fieldnames = ['Job' , ' RoutingType' , 'RateIndex' , 'Latency' , 'Latency_Lo' , 'Latency_Hi' , 'avg_prefill_queue_size' , 'avg_pending_tokens_perc' , 'avg_actual_tokens_perc' , 'pct_below_latency_target_lo' , 'pct_below_latency_target_hi' , 'num_req_lo' , 'num_req_hi ' ]
336
344
writer = csv .DictWriter (csvfile , fieldnames = fieldnames )
337
345
338
346
writer .writeheader ()
@@ -341,6 +349,7 @@ def main():
341
349
for routing_type in all_routing_types :
342
350
for i in range (len (rates_lo )):
343
351
writer .writerow ({
352
+ 'Job' : os .path .basename (output_file ),
344
353
'RoutingType' : routing_type ,
345
354
'RateIndex' : rates_lo [i ],
346
355
'Latency' : results [routing_type ]['latency' ][i ],
@@ -349,11 +358,17 @@ def main():
349
358
'avg_prefill_queue_size' : results [routing_type ]['avg_prefill_queue_size' ][i ],
350
359
'avg_pending_tokens_perc' : results [routing_type ]['avg_pending_tokens_perc' ][i ],
351
360
'avg_actual_tokens_perc' : results [routing_type ]['avg_actual_tokens_perc' ][i ],
352
- 'pct_below_latency_target_lo' : results [routing_type ]['pct_below_latency_target_lo' ][i ],
353
- 'pct_below_latency_target_hi' : results [routing_type ]['pct_below_latency_target_hi' ][i ],
361
+ 'pct_below_latency_target_lo' : results [routing_type ]['pct_below_latency_target_lo' ][i ]* results [routing_type ]['request_count_lo' ][i ]/ no_of_messages ,
362
+ 'pct_below_latency_target_hi' : results [routing_type ]['pct_below_latency_target_hi' ][i ]* results [routing_type ]['request_count_hi' ][i ]/ no_of_messages ,
363
+ 'num_req_lo' : results [routing_type ]['request_count_lo' ][i ],
364
+ 'num_req_hi' : results [routing_type ]['request_count_hi' ][i ]
354
365
})
355
366
356
- print (f"Results have been saved to { output_file } " )
367
+ # Print the CSV file to stdout
368
+ with open (output_file , 'r' ) as csvfile :
369
+ sys .stdout .write (csvfile .read ())
370
+
371
+ #print(f"Results have been saved to {output_file}")
357
372
358
373
359
374
0 commit comments