24
24
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
25
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
26
27
- from typing import Dict , Union
27
+ from typing import Dict , List , Union
28
28
29
29
import triton_python_backend_utils as pb_utils
30
30
from vllm .engine .metrics import StatLoggerBase as VllmStatLoggerBase
@@ -46,6 +46,16 @@ def __init__(self, labels):
46
46
description = "Number of generation tokens processed." ,
47
47
kind = pb_utils .MetricFamily .COUNTER ,
48
48
)
49
+ self .histogram_time_to_first_token_family = pb_utils .MetricFamily (
50
+ name = "vllm:time_to_first_token_seconds" ,
51
+ description = "Histogram of time to first token in seconds." ,
52
+ kind = pb_utils .MetricFamily .HISTOGRAM ,
53
+ )
54
+ self .histogram_time_per_output_token_family = pb_utils .MetricFamily (
55
+ name = "vllm:time_per_output_token_seconds" ,
56
+ description = "Histogram of time per output token in seconds." ,
57
+ kind = pb_utils .MetricFamily .HISTOGRAM ,
58
+ )
49
59
50
60
# Initialize metrics
51
61
# Iteration stats
@@ -55,6 +65,49 @@ def __init__(self, labels):
55
65
self .counter_generation_tokens = self .counter_generation_tokens_family .Metric (
56
66
labels = labels
57
67
)
68
+ self .histogram_time_to_first_token = (
69
+ self .histogram_time_to_first_token_family .Metric (
70
+ labels = labels ,
71
+ buckets = [
72
+ 0.001 ,
73
+ 0.005 ,
74
+ 0.01 ,
75
+ 0.02 ,
76
+ 0.04 ,
77
+ 0.06 ,
78
+ 0.08 ,
79
+ 0.1 ,
80
+ 0.25 ,
81
+ 0.5 ,
82
+ 0.75 ,
83
+ 1.0 ,
84
+ 2.5 ,
85
+ 5.0 ,
86
+ 7.5 ,
87
+ 10.0 ,
88
+ ],
89
+ )
90
+ )
91
+ self .histogram_time_per_output_token = (
92
+ self .histogram_time_per_output_token_family .Metric (
93
+ labels = labels ,
94
+ buckets = [
95
+ 0.01 ,
96
+ 0.025 ,
97
+ 0.05 ,
98
+ 0.075 ,
99
+ 0.1 ,
100
+ 0.15 ,
101
+ 0.2 ,
102
+ 0.3 ,
103
+ 0.4 ,
104
+ 0.5 ,
105
+ 0.75 ,
106
+ 1.0 ,
107
+ 2.5 ,
108
+ ],
109
+ )
110
+ )
58
111
59
112
60
113
class VllmStatLogger (VllmStatLoggerBase ):
@@ -93,6 +146,19 @@ def _log_counter(self, counter, data: Union[int, float]) -> None:
93
146
"""
94
147
if data != 0 :
95
148
counter .increment (data )
149
+
150
+ def _log_histogram (self , histogram , data : Union [List [int ], List [float ]]) -> None :
151
+ """Convenience function for logging list to histogram.
152
+
153
+ Args:
154
+ histogram: A histogram metric instance.
155
+ data: A list of int or float data to observe into the histogram metric.
156
+
157
+ Returns:
158
+ None
159
+ """
160
+ for datum in data :
161
+ histogram .observe (datum )
96
162
97
163
def log (self , stats : VllmStats ) -> None :
98
164
"""Logs tracked stats to triton metrics server every iteration.
@@ -108,4 +174,10 @@ def log(self, stats: VllmStats) -> None:
108
174
)
109
175
self ._log_counter (
110
176
self .metrics .counter_generation_tokens , stats .num_generation_tokens_iter
177
+ self ._log_histogram (
178
+ self .metrics .histogram_time_to_first_token , stats .time_to_first_tokens_iter
179
+ )
180
+ self ._log_histogram (
181
+ self .metrics .histogram_time_per_output_token ,
182
+ stats .time_per_output_tokens_iter ,
111
183
)
0 commit comments