@@ -119,39 +119,60 @@ def test_vllm_metrics(self):
119
119
model_name = self .vllm_model_name ,
120
120
)
121
121
metrics_dict = self .parse_vllm_metrics ()
122
+ total_prompts = len (self .prompts )
122
123
123
124
# vllm:prompt_tokens_total
124
125
self .assertEqual (metrics_dict ["vllm:prompt_tokens_total" ], 18 )
125
126
# vllm:generation_tokens_total
126
127
self .assertEqual (metrics_dict ["vllm:generation_tokens_total" ], 48 )
127
128
# vllm:time_to_first_token_seconds
128
- self .assertEqual (metrics_dict ["vllm:time_to_first_token_seconds_count" ], 3 )
129
+ self .assertEqual (
130
+ metrics_dict ["vllm:time_to_first_token_seconds_count" ], total_prompts
131
+ )
129
132
self .assertGreater (metrics_dict ["vllm:time_to_first_token_seconds_sum" ], 0 )
130
- self .assertEqual (metrics_dict ["vllm:time_to_first_token_seconds_bucket" ], 3 )
133
+ self .assertEqual (
134
+ metrics_dict ["vllm:time_to_first_token_seconds_bucket" ], total_prompts
135
+ )
131
136
# vllm:time_per_output_token_seconds
132
137
self .assertEqual (metrics_dict ["vllm:time_per_output_token_seconds_count" ], 45 )
133
138
self .assertGreater (metrics_dict ["vllm:time_per_output_token_seconds_sum" ], 0 )
134
139
self .assertEqual (metrics_dict ["vllm:time_per_output_token_seconds_bucket" ], 45 )
135
140
# vllm:e2e_request_latency_seconds
136
- self .assertEqual (metrics_dict ["vllm:e2e_request_latency_seconds_count" ], 3 )
141
+ self .assertEqual (
142
+ metrics_dict ["vllm:e2e_request_latency_seconds_count" ], total_prompts
143
+ )
137
144
self .assertGreater (metrics_dict ["vllm:e2e_request_latency_seconds_sum" ], 0 )
138
- self .assertEqual (metrics_dict ["vllm:e2e_request_latency_seconds_bucket" ], 3 )
145
+ self .assertEqual (
146
+ metrics_dict ["vllm:e2e_request_latency_seconds_bucket" ], total_prompts
147
+ )
139
148
# vllm:request_prompt_tokens
140
- self .assertEqual (metrics_dict ["vllm:request_prompt_tokens_count" ], 3 )
149
+ self .assertEqual (
150
+ metrics_dict ["vllm:request_prompt_tokens_count" ], total_prompts
151
+ )
141
152
self .assertEqual (metrics_dict ["vllm:request_prompt_tokens_sum" ], 18 )
142
- self .assertEqual (metrics_dict ["vllm:request_prompt_tokens_bucket" ], 3 )
153
+ self .assertEqual (
154
+ metrics_dict ["vllm:request_prompt_tokens_bucket" ], total_prompts
155
+ )
143
156
# vllm:request_generation_tokens
144
- self .assertEqual (metrics_dict ["vllm:request_generation_tokens_count" ], 3 )
157
+ self .assertEqual (
158
+ metrics_dict ["vllm:request_generation_tokens_count" ], total_prompts
159
+ )
145
160
self .assertEqual (metrics_dict ["vllm:request_generation_tokens_sum" ], 48 )
146
- self .assertEqual (metrics_dict ["vllm:request_generation_tokens_bucket" ], 3 )
161
+ self .assertEqual (
162
+ metrics_dict ["vllm:request_generation_tokens_bucket" ], total_prompts
163
+ )
147
164
# vllm:request_params_best_of
148
- self .assertEqual (metrics_dict ["vllm:request_params_best_of_count" ], 3 )
165
+ self .assertEqual (
166
+ metrics_dict ["vllm:request_params_best_of_count" ], total_prompts
167
+ )
149
168
self .assertEqual (metrics_dict ["vllm:request_params_best_of_sum" ], 3 )
150
- self .assertEqual (metrics_dict ["vllm:request_params_best_of_bucket" ], 3 )
169
+ self .assertEqual (
170
+ metrics_dict ["vllm:request_params_best_of_bucket" ], total_prompts
171
+ )
151
172
# vllm:request_params_n
152
- self .assertEqual (metrics_dict ["vllm:request_params_n_count" ], 3 )
173
+ self .assertEqual (metrics_dict ["vllm:request_params_n_count" ], total_prompts )
153
174
self .assertEqual (metrics_dict ["vllm:request_params_n_sum" ], 3 )
154
- self .assertEqual (metrics_dict ["vllm:request_params_n_bucket" ], 3 )
175
+ self .assertEqual (metrics_dict ["vllm:request_params_n_bucket" ], total_prompts )
155
176
156
177
def test_vllm_metrics_disabled (self ):
157
178
# Test vLLM metrics
0 commit comments