Skip to content

Commit de7ff8f

Browse files
committed
Add histogram test
1 parent b1333ce commit de7ff8f

File tree

2 files changed

+85
-1
lines changed

2 files changed

+85
-1
lines changed

ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,17 @@ def test_vllm_metrics(self):
125125
# vllm:generation_tokens_total
126126
self.assertEqual(metrics_dict["vllm:generation_tokens_total"], 48)
127127

128+
# vllm:time_to_first_token_seconds
129+
self.assertEqual(metrics_dict["vllm:time_to_first_token_seconds_count"], 3)
130+
self.assertTrue(
131+
0 < metrics_dict["vllm:time_to_first_token_seconds_sum"] < 0.0005
132+
)
133+
# vllm:time_per_output_token_seconds
134+
self.assertEqual(metrics_dict["vllm:time_per_output_token_seconds_count"], 45)
135+
self.assertTrue(
136+
0 <= metrics_dict["vllm:time_per_output_token_seconds_sum"] <= 0.005
137+
)
138+
128139
def test_vllm_metrics_disabled(self):
129140
# Test vLLM metrics
130141
self.vllm_infer(

src/utils/metrics.py

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2525
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2626

27-
from typing import Dict, Union
27+
from typing import Dict, List, Union
2828

2929
import triton_python_backend_utils as pb_utils
3030
from vllm.engine.metrics import StatLoggerBase as VllmStatLoggerBase
@@ -46,6 +46,16 @@ def __init__(self, labels):
4646
description="Number of generation tokens processed.",
4747
kind=pb_utils.MetricFamily.COUNTER,
4848
)
49+
self.histogram_time_to_first_token_family = pb_utils.MetricFamily(
50+
name="vllm:time_to_first_token_seconds",
51+
description="Histogram of time to first token in seconds.",
52+
kind=pb_utils.MetricFamily.HISTOGRAM,
53+
)
54+
self.histogram_time_per_output_token_family = pb_utils.MetricFamily(
55+
name="vllm:time_per_output_token_seconds",
56+
description="Histogram of time per output token in seconds.",
57+
kind=pb_utils.MetricFamily.HISTOGRAM,
58+
)
4959

5060
# Initialize metrics
5161
# Iteration stats
@@ -55,6 +65,49 @@ def __init__(self, labels):
5565
self.counter_generation_tokens = self.counter_generation_tokens_family.Metric(
5666
labels=labels
5767
)
68+
self.histogram_time_to_first_token = (
69+
self.histogram_time_to_first_token_family.Metric(
70+
labels=labels,
71+
buckets=[
72+
0.001,
73+
0.005,
74+
0.01,
75+
0.02,
76+
0.04,
77+
0.06,
78+
0.08,
79+
0.1,
80+
0.25,
81+
0.5,
82+
0.75,
83+
1.0,
84+
2.5,
85+
5.0,
86+
7.5,
87+
10.0,
88+
],
89+
)
90+
)
91+
self.histogram_time_per_output_token = (
92+
self.histogram_time_per_output_token_family.Metric(
93+
labels=labels,
94+
buckets=[
95+
0.01,
96+
0.025,
97+
0.05,
98+
0.075,
99+
0.1,
100+
0.15,
101+
0.2,
102+
0.3,
103+
0.4,
104+
0.5,
105+
0.75,
106+
1.0,
107+
2.5,
108+
],
109+
)
110+
)
58111

59112

60113
class VllmStatLogger(VllmStatLoggerBase):
@@ -82,6 +135,19 @@ def _log_counter(self, counter, data: Union[int, float]) -> None:
82135
if data != 0:
83136
counter.increment(data)
84137

138+
def _log_histogram(self, histogram, data: Union[List[int], List[float]]) -> None:
139+
"""Convenience function for logging list to histogram.
140+
141+
Args:
142+
histogram: A histogram metric instance.
143+
data: A list of int or float data to observe into the histogram metric.
144+
145+
Returns:
146+
None
147+
"""
148+
for datum in data:
149+
histogram.observe(datum)
150+
85151
def log(self, stats: VllmStats) -> None:
86152
"""Report stats to Triton metrics server.
87153
@@ -97,3 +163,10 @@ def log(self, stats: VllmStats) -> None:
97163
self._log_counter(
98164
self.metrics.counter_generation_tokens, stats.num_generation_tokens_iter
99165
)
166+
self._log_histogram(
167+
self.metrics.histogram_time_to_first_token, stats.time_to_first_tokens_iter
168+
)
169+
self._log_histogram(
170+
self.metrics.histogram_time_per_output_token,
171+
stats.time_per_output_tokens_iter,
172+
)

0 commit comments

Comments
 (0)