Skip to content

Commit fca7a71

Browse files
committed
Minor updates
1 parent e045668 commit fca7a71

File tree

1 file changed

+33
-33
lines changed

1 file changed

+33
-33
lines changed

src/utils/metrics.py

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def __init__(self, labels: List[str], max_model_len: int):
9393
self.counter_generation_tokens = self.counter_generation_tokens_family.Metric(
9494
labels=labels
9595
)
96-
# Use the same bucket boundaries from vLLM sample metrics.
96+
# Use the same bucket boundaries from vLLM sample metrics as an example.
9797
# https://github.com/vllm-project/vllm/blob/21313e09e3f9448817016290da20d0db1adf3664/vllm/engine/metrics.py#L81-L96
9898
self.histogram_time_to_first_token = (
9999
self.histogram_time_to_first_token_family.Metric(
@@ -214,35 +214,35 @@ def log(self, stats: VllmStats) -> None:
214214
Returns:
215215
None
216216
"""
217-
# Iteration stats
218-
self._log_counter(
219-
self.metrics.counter_prompt_tokens, stats.num_prompt_tokens_iter
220-
)
221-
self._log_counter(
222-
self.metrics.counter_generation_tokens, stats.num_generation_tokens_iter
223-
)
224-
self._log_histogram(
225-
self.metrics.histogram_time_to_first_token, stats.time_to_first_tokens_iter
226-
)
227-
self._log_histogram(
228-
self.metrics.histogram_time_per_output_token,
229-
stats.time_per_output_tokens_iter,
230-
)
231-
# Request stats
232-
# Latency
233-
self._log_histogram(
234-
self.metrics.histogram_e2e_time_request, stats.time_e2e_requests
235-
)
236-
# Metadata
237-
self._log_histogram(
238-
self.metrics.histogram_num_prompt_tokens_request,
239-
stats.num_prompt_tokens_requests,
240-
)
241-
self._log_histogram(
242-
self.metrics.histogram_num_generation_tokens_request,
243-
stats.num_generation_tokens_requests,
244-
)
245-
self._log_histogram(
246-
self.metrics.histogram_best_of_request, stats.best_of_requests
247-
)
248-
self._log_histogram(self.metrics.histogram_n_request, stats.n_requests)
217+
# List of reported metrics can be found in this doc.
218+
# https://github.com/triton-inference-server/vllm_backend?tab=readme-ov-file#triton-metrics
219+
counter_metrics = [
220+
(self.metrics.counter_prompt_tokens, stats.num_prompt_tokens_iter),
221+
(self.metrics.counter_generation_tokens, stats.num_generation_tokens_iter),
222+
]
223+
histogram_metrics = [
224+
(
225+
self.metrics.histogram_time_to_first_token,
226+
stats.time_to_first_tokens_iter,
227+
),
228+
(
229+
self.metrics.histogram_time_per_output_token,
230+
stats.time_per_output_tokens_iter,
231+
),
232+
(self.metrics.histogram_e2e_time_request, stats.time_e2e_requests),
233+
(
234+
self.metrics.histogram_num_prompt_tokens_request,
235+
stats.num_prompt_tokens_requests,
236+
),
237+
(
238+
self.metrics.histogram_num_generation_tokens_request,
239+
stats.num_generation_tokens_requests,
240+
),
241+
(self.metrics.histogram_best_of_request, stats.best_of_requests),
242+
(self.metrics.histogram_n_request, stats.n_requests),
243+
]
244+
245+
for metric, data in counter_metrics:
246+
self._log_counter(metric, data)
247+
for metric, data in histogram_metrics:
248+
self._log_histogram(metric, data)

0 commit comments

Comments
 (0)