Skip to content

Commit b1333ce

Browse files
committed
Both args checking
1 parent de8f25b commit b1333ce

File tree

3 files changed

+70
-10
lines changed

3 files changed

+70
-10
lines changed

README.md

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -224,9 +224,14 @@ vllm:prompt_tokens_total{model="vllm_model",version="1"} 10
224224
# TYPE vllm:generation_tokens_total counter
225225
vllm:generation_tokens_total{model="vllm_model",version="1"} 16
226226
```
227-
*Note:* The vLLM metrics reporting is disabled by default due to potential
228-
performance slowdowns. To enable vLLM model's metrics reporting, please add
229-
following lines to its config.pbtxt.
227+
To enable vLLM engine colleting metrics, "disable_log_stats" option need to be either false
228+
or left empty (false by default) in [model.json](https://github.com/triton-inference-server/vllm_backend/blob/main/samples/model_repository/vllm_model/1/model.json).
229+
```bash
230+
"disable_log_stats": false
231+
```
232+
*Note:* vLLM metrics are not reported to Triton metrics server by default
233+
due to potential performance slowdowns. To enable vLLM model's metrics
234+
reporting, please add following lines to its config.pbtxt as well.
230235
```bash
231236
parameters: {
232237
key: "REPORT_CUSTOM_METRICS"

ci/L0_backend_vllm/metrics_test/test.sh

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,17 +39,20 @@ SAMPLE_MODELS_REPO="../../../samples/model_repository"
3939
EXPECTED_NUM_TESTS=1
4040

4141
# Helpers =======================================
42-
rm -rf models && mkdir -p models
43-
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_opt
44-
# `vllm_opt` model will be loaded on server start and stay loaded throughout
45-
# unittesting. To ensure that vllm's memory profiler will not error out
46-
# on `vllm_load_test` load, we reduce "gpu_memory_utilization" for `vllm_opt`,
47-
# so that at least 60% of GPU memory was available for other models.
48-
sed -i 's/"gpu_memory_utilization": 0.5/"gpu_memory_utilization": 0.4/' models/vllm_opt/1/model.json
42+
function copy_model_repository {
43+
rm -rf models && mkdir -p models
44+
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_opt
45+
# `vllm_opt` model will be loaded on server start and stay loaded throughout
46+
# unittesting. To ensure that vllm's memory profiler will not error out
47+
# on `vllm_load_test` load, we reduce "gpu_memory_utilization" for `vllm_opt`,
48+
# so that at least 60% of GPU memory was available for other models.
49+
sed -i 's/"gpu_memory_utilization": 0.5/"gpu_memory_utilization": 0.4/' models/vllm_opt/1/model.json
50+
}
4951

5052
RET=0
5153

5254
# Test disabling vLLM metrics reporting without parameter "REPORT_CUSTOM_METRICS" in config.pbtxt
55+
copy_model_repository
5356
run_server
5457
if [ "$SERVER_PID" == "0" ]; then
5558
cat $SERVER_LOG
@@ -78,6 +81,7 @@ kill $SERVER_PID
7881
wait $SERVER_PID
7982

8083
# Test disabling vLLM metrics reporting with parameter "REPORT_CUSTOM_METRICS" set to "no" in config.pbtxt
84+
copy_model_repository
8185
echo -e "
8286
parameters: {
8387
key: \"REPORT_CUSTOM_METRICS\"
@@ -115,6 +119,7 @@ kill $SERVER_PID
115119
wait $SERVER_PID
116120

117121
# Test vLLM metrics reporting with parameter "REPORT_CUSTOM_METRICS" set to "yes" in config.pbtxt
122+
copy_model_repository
118123
cp ${SAMPLE_MODELS_REPO}/vllm_model/config.pbtxt models/vllm_opt
119124
echo -e "
120125
parameters: {
@@ -152,7 +157,56 @@ set -e
152157
kill $SERVER_PID
153158
wait $SERVER_PID
154159

160+
# Test enabling vLLM metrics reporting in config.pbtxt but disabling in model.json
161+
copy_model_repository
162+
jq '. += {"disable_log_stats" : true}' models/vllm_opt/1/model.json > "temp.json"
163+
mv temp.json models/vllm_opt/1/model.json
164+
echo -e "
165+
parameters: {
166+
key: \"REPORT_CUSTOM_METRICS\"
167+
value: {
168+
string_value:\"yes\"
169+
}
170+
}
171+
" >> models/vllm_opt/config.pbtxt
172+
173+
run_server
174+
if [ "$SERVER_PID" == "0" ]; then
175+
cat $SERVER_LOG
176+
echo -e "\n***\n*** Failed to start $SERVER\n***"
177+
exit 1
178+
fi
179+
180+
set +e
181+
python3 $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics_disabled -v > $CLIENT_LOG 2>&1
182+
183+
if [ $? -ne 0 ]; then
184+
cat $CLIENT_LOG
185+
echo -e "\n***\n*** Running $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics_disabled FAILED. \n***"
186+
RET=1
187+
else
188+
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
189+
if [ $? -ne 0 ]; then
190+
cat $CLIENT_LOG
191+
echo -e "\n***\n*** Test Result Verification FAILED.\n***"
192+
RET=1
193+
fi
194+
fi
195+
set -e
196+
197+
kill $SERVER_PID
198+
wait $SERVER_PID
199+
155200
# Test enabling vLLM metrics reporting in config.pbtxt while disabling in server option
201+
copy_model_repository
202+
echo -e "
203+
parameters: {
204+
key: \"REPORT_CUSTOM_METRICS\"
205+
value: {
206+
string_value:\"yes\"
207+
}
208+
}
209+
" >> models/vllm_opt/config.pbtxt
156210
SERVER_ARGS="${SERVER_ARGS} --allow-metrics=false"
157211
run_server
158212
if [ "$SERVER_PID" == "0" ]; then

src/model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ def init_engine(self):
165165
"REPORT_CUSTOM_METRICS" in self.model_config["parameters"]
166166
and self.model_config["parameters"]["REPORT_CUSTOM_METRICS"]["string_value"]
167167
== "yes"
168+
and not aync_engine_args.disable_log_stats
168169
):
169170
try:
170171
labels = {

0 commit comments

Comments
 (0)