Skip to content

Commit 501f74d

Browse files
authored
fix: Fix L0_backend_vllm* jobs (#62)
1 parent 98947a7 commit 501f74d

File tree

2 files changed

+79
-143
lines changed

2 files changed

+79
-143
lines changed

ci/L0_backend_vllm/metrics_test/test.sh

Lines changed: 47 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -49,36 +49,42 @@ function copy_model_repository {
4949
sed -i 's/"gpu_memory_utilization": 0.5/"gpu_memory_utilization": 0.4/' models/vllm_opt/1/model.json
5050
}
5151

52-
RET=0
53-
54-
# Test disabling vLLM metrics reporting without parameter "REPORT_CUSTOM_METRICS" in config.pbtxt
55-
copy_model_repository
56-
run_server
57-
if [ "$SERVER_PID" == "0" ]; then
58-
cat $SERVER_LOG
59-
echo -e "\n***\n*** Failed to start $SERVER\n***"
60-
exit 1
61-
fi
52+
run_test() {
53+
local TEST_CASE=$1
54+
55+
run_server
56+
if [ "$SERVER_PID" == "0" ]; then
57+
cat $SERVER_LOG
58+
echo -e "\n***\n*** Failed to start $SERVER\n***"
59+
exit 1
60+
fi
6261

63-
set +e
64-
python3 $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics_disabled -v > $CLIENT_LOG 2>&1
62+
set +e
63+
python3 $CLIENT_PY $TEST_CASE -v > $CLIENT_LOG 2>&1
6564

66-
if [ $? -ne 0 ]; then
67-
cat $CLIENT_LOG
68-
echo -e "\n***\n*** Running $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics_disabled FAILED. \n***"
69-
RET=1
70-
else
71-
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
7265
if [ $? -ne 0 ]; then
7366
cat $CLIENT_LOG
74-
echo -e "\n***\n*** Test Result Verification FAILED.\n***"
67+
echo -e "\n***\n*** Running $CLIENT_PY $TEST_CASE FAILED. \n***"
7568
RET=1
69+
else
70+
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
71+
if [ $? -ne 0 ]; then
72+
cat $CLIENT_LOG
73+
echo -e "\n***\n*** Test Result Verification FAILED.\n***"
74+
RET=1
75+
fi
7676
fi
77-
fi
78-
set -e
77+
set -e
7978

80-
kill $SERVER_PID
81-
wait $SERVER_PID
79+
kill $SERVER_PID
80+
wait $SERVER_PID
81+
}
82+
83+
RET=0
84+
85+
# Test disabling vLLM metrics reporting without parameter "REPORT_CUSTOM_METRICS" in config.pbtxt
86+
copy_model_repository
87+
run_test VLLMTritonMetricsTest.test_vllm_metrics_disabled
8288

8389
# Test disabling vLLM metrics reporting with parameter "REPORT_CUSTOM_METRICS" set to "no" in config.pbtxt
8490
copy_model_repository
@@ -90,33 +96,7 @@ parameters: {
9096
}
9197
}
9298
" >> models/vllm_opt/config.pbtxt
93-
94-
run_server
95-
if [ "$SERVER_PID" == "0" ]; then
96-
cat $SERVER_LOG
97-
echo -e "\n***\n*** Failed to start $SERVER\n***"
98-
exit 1
99-
fi
100-
101-
set +e
102-
python3 $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics_disabled -v > $CLIENT_LOG 2>&1
103-
104-
if [ $? -ne 0 ]; then
105-
cat $CLIENT_LOG
106-
echo -e "\n***\n*** Running $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics_disabled FAILED. \n***"
107-
RET=1
108-
else
109-
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
110-
if [ $? -ne 0 ]; then
111-
cat $CLIENT_LOG
112-
echo -e "\n***\n*** Test Result Verification FAILED.\n***"
113-
RET=1
114-
fi
115-
fi
116-
set -e
117-
118-
kill $SERVER_PID
119-
wait $SERVER_PID
99+
run_test VLLMTritonMetricsTest.test_vllm_metrics_disabled
120100

121101
# Test vLLM metrics reporting with parameter "REPORT_CUSTOM_METRICS" set to "yes" in config.pbtxt
122102
copy_model_repository
@@ -129,33 +109,22 @@ parameters: {
129109
}
130110
}
131111
" >> models/vllm_opt/config.pbtxt
112+
run_test VLLMTritonMetricsTest.test_vllm_metrics
132113

133-
run_server
134-
if [ "$SERVER_PID" == "0" ]; then
135-
cat $SERVER_LOG
136-
echo -e "\n***\n*** Failed to start $SERVER\n***"
137-
exit 1
138-
fi
139-
140-
set +e
141-
python3 $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics -v > $CLIENT_LOG 2>&1
142-
143-
if [ $? -ne 0 ]; then
144-
cat $CLIENT_LOG
145-
echo -e "\n***\n*** Running $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics FAILED. \n***"
146-
RET=1
147-
else
148-
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
149-
if [ $? -ne 0 ]; then
150-
cat $CLIENT_LOG
151-
echo -e "\n***\n*** Test Result Verification FAILED.\n***"
152-
RET=1
153-
fi
154-
fi
155-
set -e
156-
157-
kill $SERVER_PID
158-
wait $SERVER_PID
114+
# Test vLLM metrics custom sampling parameters
115+
# Custom sampling parameters may result in different vLLM output depending
116+
# on the platform. Therefore, these metrics are tests separately.
117+
copy_model_repository
118+
cp ${SAMPLE_MODELS_REPO}/vllm_model/config.pbtxt models/vllm_opt
119+
echo -e "
120+
parameters: {
121+
key: \"REPORT_CUSTOM_METRICS\"
122+
value: {
123+
string_value:\"yes\"
124+
}
125+
}
126+
" >> models/vllm_opt/config.pbtxt
127+
run_test VLLMTritonMetricsTest.test_custom_sampling_params
159128

160129
# Test enabling vLLM metrics reporting in config.pbtxt but disabling in model.json
161130
copy_model_repository
@@ -169,33 +138,7 @@ parameters: {
169138
}
170139
}
171140
" >> models/vllm_opt/config.pbtxt
172-
173-
run_server
174-
if [ "$SERVER_PID" == "0" ]; then
175-
cat $SERVER_LOG
176-
echo -e "\n***\n*** Failed to start $SERVER\n***"
177-
exit 1
178-
fi
179-
180-
set +e
181-
python3 $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics_disabled -v > $CLIENT_LOG 2>&1
182-
183-
if [ $? -ne 0 ]; then
184-
cat $CLIENT_LOG
185-
echo -e "\n***\n*** Running $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics_disabled FAILED. \n***"
186-
RET=1
187-
else
188-
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
189-
if [ $? -ne 0 ]; then
190-
cat $CLIENT_LOG
191-
echo -e "\n***\n*** Test Result Verification FAILED.\n***"
192-
RET=1
193-
fi
194-
fi
195-
set -e
196-
197-
kill $SERVER_PID
198-
wait $SERVER_PID
141+
run_test VLLMTritonMetricsTest.test_vllm_metrics_disabled
199142

200143
# Test enabling vLLM metrics reporting in config.pbtxt while disabling in server option
201144
copy_model_repository
@@ -208,32 +151,8 @@ parameters: {
208151
}
209152
" >> models/vllm_opt/config.pbtxt
210153
SERVER_ARGS="${SERVER_ARGS} --allow-metrics=false"
211-
run_server
212-
if [ "$SERVER_PID" == "0" ]; then
213-
cat $SERVER_LOG
214-
echo -e "\n***\n*** Failed to start $SERVER\n***"
215-
exit 1
216-
fi
217-
218-
set +e
219-
python3 $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics_refused -v > $CLIENT_LOG 2>&1
220-
221-
if [ $? -ne 0 ]; then
222-
cat $CLIENT_LOG
223-
echo -e "\n***\n*** Running $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics_refused FAILED. \n***"
224-
RET=1
225-
else
226-
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
227-
if [ $? -ne 0 ]; then
228-
cat $CLIENT_LOG
229-
echo -e "\n***\n*** Test Result Verification FAILED.\n***"
230-
RET=1
231-
fi
232-
fi
233-
set -e
154+
run_test VLLMTritonMetricsTest.test_vllm_metrics_refused
234155

235-
kill $SERVER_PID
236-
wait $SERVER_PID
237156
rm -rf "./models" "temp.json"
238157

239158
if [ $RET -eq 1 ]; then

ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -112,29 +112,25 @@ def vllm_infer(
112112
self.triton_client.stop_stream()
113113

114114
def test_vllm_metrics(self):
115-
# Adding sampling parameters for testing metrics.
116-
# Definitions can be found here https://docs.vllm.ai/en/latest/dev/sampling_params.html
117-
n, best_of = 2, 4
118-
custom_sampling_parameters = self.sampling_parameters.copy()
119-
# Changing "temperature" because "best_of" must be 1 when using greedy
120-
# sampling, i.e. "temperature": "0".
121-
custom_sampling_parameters.update(
122-
{"n": str(n), "best_of": str(best_of), "temperature": "1"}
123-
)
124-
125115
# Test vLLM metrics
126116
self.vllm_infer(
127117
prompts=self.prompts,
128-
sampling_parameters=custom_sampling_parameters,
118+
sampling_parameters=self.sampling_parameters,
129119
model_name=self.vllm_model_name,
130120
)
131121
metrics_dict = self.parse_vllm_metrics()
132122
total_prompts = len(self.prompts)
133123

134124
# vllm:prompt_tokens_total
125+
# (2, 133, 144, 2702, 3477, 16)
126+
# (2, 133, 812, 9, 1470, 16)
127+
# (2, 133, 499, 9, 4687, 16)
135128
self.assertEqual(metrics_dict["vllm:prompt_tokens_total"], 18)
136129
# vllm:generation_tokens_total
137-
self.assertEqual(metrics_dict["vllm:generation_tokens_total"], 188)
130+
# (5, 65, 14, 16, 144, 533, 7, 28, 848, 30, 10, 512, 4, 50118, 100, 437)
131+
# (5, 812, 9, 5, 1515, 3497, 4, 50118, 50118, 133, 812, 9, 1470, 16, 5, 812)
132+
# (11, 5, 1420, 9, 5, 82, 4, 50118, 50118, 133, 499, 9, 4687, 16, 11, 5)
133+
self.assertEqual(metrics_dict["vllm:generation_tokens_total"], 48)
138134
# vllm:time_to_first_token_seconds
139135
self.assertEqual(
140136
metrics_dict["vllm:time_to_first_token_seconds_count"], total_prompts
@@ -166,13 +162,34 @@ def test_vllm_metrics(self):
166162
# vllm:request_generation_tokens
167163
self.assertEqual(
168164
metrics_dict["vllm:request_generation_tokens_count"],
169-
best_of * total_prompts,
165+
total_prompts,
170166
)
171-
self.assertEqual(metrics_dict["vllm:request_generation_tokens_sum"], 188)
167+
self.assertEqual(metrics_dict["vllm:request_generation_tokens_sum"], 48)
172168
self.assertEqual(
173169
metrics_dict["vllm:request_generation_tokens_bucket"],
174-
best_of * total_prompts,
170+
total_prompts,
171+
)
172+
173+
def test_custom_sampling_params(self):
174+
# Adding sampling parameters for testing metrics.
175+
# Definitions can be found here https://docs.vllm.ai/en/latest/dev/sampling_params.html
176+
n, best_of = 2, 4
177+
custom_sampling_parameters = self.sampling_parameters.copy()
178+
# Changing "temperature" because "best_of" must be 1 when using greedy
179+
# sampling, i.e. "temperature": "0".
180+
custom_sampling_parameters.update(
181+
{"n": str(n), "best_of": str(best_of), "temperature": "1"}
182+
)
183+
184+
# Test vLLM metrics
185+
self.vllm_infer(
186+
prompts=self.prompts,
187+
sampling_parameters=custom_sampling_parameters,
188+
model_name=self.vllm_model_name,
175189
)
190+
metrics_dict = self.parse_vllm_metrics()
191+
total_prompts = len(self.prompts)
192+
176193
# vllm:request_params_best_of
177194
self.assertEqual(
178195
metrics_dict["vllm:request_params_best_of_count"], total_prompts

0 commit comments

Comments
 (0)