File tree Expand file tree Collapse file tree 2 files changed +6
-10
lines changed
ci/L0_backend_vllm/metrics_test Expand file tree Collapse file tree 2 files changed +6
-10
lines changed Original file line number Diff line number Diff line change @@ -70,12 +70,10 @@ def get_metrics(self):
70
70
71
71
return vllm_dict
72
72
73
- def vllm_async_stream_infer (
73
+ def vllm_infer (
74
74
self ,
75
75
prompts ,
76
76
sampling_parameters ,
77
- stream ,
78
- send_parameters_as_tensor ,
79
77
model_name ,
80
78
):
81
79
"""
@@ -89,15 +87,15 @@ def vllm_async_stream_infer(
89
87
request_data = create_vllm_request (
90
88
prompts [i ],
91
89
i ,
92
- stream ,
90
+ False ,
93
91
sampling_parameters ,
94
92
model_name ,
95
- send_parameters_as_tensor ,
93
+ True ,
96
94
)
97
95
self .triton_client .async_stream_infer (
98
96
model_name = model_name ,
99
- request_id = request_data ["request_id" ],
100
97
inputs = request_data ["inputs" ],
98
+ request_id = request_data ["request_id" ],
101
99
outputs = request_data ["outputs" ],
102
100
parameters = sampling_parameters ,
103
101
)
@@ -121,11 +119,9 @@ def test_vllm_metrics(self):
121
119
}
122
120
123
121
# Test vLLM metrics
124
- self .vllm_async_stream_infer (
122
+ self .vllm_infer (
125
123
prompts = self .prompts ,
126
124
sampling_parameters = self .sampling_parameters ,
127
- stream = False ,
128
- send_parameters_as_tensor = True ,
129
125
model_name = self .vllm_model_name ,
130
126
)
131
127
expected_metrics_dict ["vllm:prompt_tokens_total" ] = 18
Original file line number Diff line number Diff line change @@ -168,7 +168,7 @@ def init_engine(self):
168
168
)
169
169
170
170
# Add vLLM custom metrics
171
- if not self .metrics :
171
+ if self .metrics :
172
172
self .llm_engine .add_logger ("triton" , self .metrics )
173
173
174
174
def setup_lora (self ):
You can’t perform that action at this time.
0 commit comments