@@ -93,7 +93,7 @@ def __init__(self, labels: List[str], max_model_len: int):
93
93
self .counter_generation_tokens = self .counter_generation_tokens_family .Metric (
94
94
labels = labels
95
95
)
96
- # Use the same bucket boundaries from vLLM sample metrics.
96
+ # Use the same bucket boundaries from vLLM sample metrics as an example .
97
97
# https://github.com/vllm-project/vllm/blob/21313e09e3f9448817016290da20d0db1adf3664/vllm/engine/metrics.py#L81-L96
98
98
self .histogram_time_to_first_token = (
99
99
self .histogram_time_to_first_token_family .Metric (
@@ -214,35 +214,35 @@ def log(self, stats: VllmStats) -> None:
214
214
Returns:
215
215
None
216
216
"""
217
- # Iteration stats
218
- self . _log_counter (
219
- self . metrics . counter_prompt_tokens , stats . num_prompt_tokens_iter
220
- )
221
- self ._log_counter (
222
- self . metrics . counter_generation_tokens , stats . num_generation_tokens_iter
223
- )
224
- self . _log_histogram (
225
- self .metrics .histogram_time_to_first_token , stats . time_to_first_tokens_iter
226
- )
227
- self . _log_histogram (
228
- self . metrics . histogram_time_per_output_token ,
229
- stats . time_per_output_tokens_iter ,
230
- )
231
- # Request stats
232
- # Latency
233
- self . _log_histogram (
234
- self .metrics .histogram_e2e_time_request , stats . time_e2e_requests
235
- )
236
- # Metadata
237
- self . _log_histogram (
238
- self .metrics .histogram_num_prompt_tokens_request ,
239
- stats .num_prompt_tokens_requests ,
240
- )
241
- self ._log_histogram (
242
- self .metrics .histogram_num_generation_tokens_request ,
243
- stats . num_generation_tokens_requests ,
244
- )
245
- self . _log_histogram (
246
- self .metrics . histogram_best_of_request , stats . best_of_requests
247
- )
248
- self ._log_histogram (self . metrics . histogram_n_request , stats . n_requests )
217
+ # List of reported metrics can be found in this doc.
218
+ # https://github.com/triton-inference-server/vllm_backend?tab=readme-ov-file#triton-metrics
219
+ counter_metrics = [
220
+ ( self . metrics . counter_prompt_tokens , stats . num_prompt_tokens_iter ),
221
+ ( self .metrics . counter_generation_tokens , stats . num_generation_tokens_iter ),
222
+ ]
223
+ histogram_metrics = [
224
+ (
225
+ self .metrics .histogram_time_to_first_token ,
226
+ stats . time_to_first_tokens_iter ,
227
+ ),
228
+ (
229
+ self . metrics . histogram_time_per_output_token ,
230
+ stats . time_per_output_tokens_iter ,
231
+ ),
232
+ ( self . metrics . histogram_e2e_time_request , stats . time_e2e_requests ),
233
+ (
234
+ self .metrics .histogram_num_prompt_tokens_request ,
235
+ stats . num_prompt_tokens_requests ,
236
+ ),
237
+ (
238
+ self .metrics .histogram_num_generation_tokens_request ,
239
+ stats .num_generation_tokens_requests ,
240
+ ),
241
+ ( self .metrics . histogram_best_of_request , stats . best_of_requests ),
242
+ ( self .metrics .histogram_n_request , stats . n_requests ) ,
243
+ ]
244
+
245
+ for metric , data in counter_metrics :
246
+ self ._log_counter ( metric , data )
247
+ for metric , data in histogram_metrics :
248
+ self ._log_histogram (metric , data )
0 commit comments