Test server option set --allow-metrics=false

yinggeh · yinggeh · commit 1158feeb00ee · 2024-08-14T23:11:17.000-07:00
diff --git a/ci/L0_backend_vllm/metrics_test/test.sh b/ci/L0_backend_vllm/metrics_test/test.sh
@@ -114,7 +114,7 @@ set -e
 kill $SERVER_PID
 wait $SERVER_PID
 
-# Test vLLM metrics reporting with parameter "REPORT_METRICS" set to "no" in config.pbtxt
+# Test vLLM metrics reporting with parameter "REPORT_METRICS" set to "yes" in config.pbtxt
 cp ${SAMPLE_MODELS_REPO}/vllm_model/config.pbtxt models/vllm_opt
 echo -e "
 parameters: {
@@ -149,6 +149,35 @@ else
 fi
 set -e
 
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test enabling vLLM metrics reporting in config.pbtxt while disabling in server option
+SERVER_ARGS="${SERVER_ARGS} --allow-metrics=false"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    exit 1
+fi
+
+set +e
+python3 $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics_refused -v > $CLIENT_LOG 2>&1
+
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Running $CLIENT_PY VLLMTritonMetricsTest.test_vllm_metrics_refused FAILED. \n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification FAILED.\n***"
+        RET=1
+    fi
+fi
+set -e
+
 kill $SERVER_PID
 wait $SERVER_PID
 rm -rf "./models" "temp.json"
diff --git a/ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py b/ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py
@@ -137,6 +137,16 @@ def test_vllm_metrics_disabled(self):
         # No vLLM metric found
         self.assertEqual(len(metrics_dict), 0)
 
+    def test_vllm_metrics_refused(self):
+        # Test vLLM metrics
+        self.vllm_infer(
+            prompts=self.prompts,
+            sampling_parameters=self.sampling_parameters,
+            model_name=self.vllm_model_name,
+        )
+        with self.assertRaises(requests.exceptions.ConnectionError):
+            self.get_vllm_metrics()
+
     def tearDown(self):
         self.triton_client.close()