File tree Expand file tree Collapse file tree 1 file changed +17
-9
lines changed Expand file tree Collapse file tree 1 file changed +17
-9
lines changed Original file line number Diff line number Diff line change @@ -109,6 +109,20 @@ def initialize(self, args):
109
109
)
110
110
self .output_dtype = pb_utils .triton_string_to_numpy (output_config ["data_type" ])
111
111
112
+ # Create vLLM custom metrics
113
+ try :
114
+ labels = {
115
+ "model" : self .args ["model_name" ],
116
+ "version" : self .args ["model_version" ],
117
+ }
118
+ self .metrics = VllmStatLogger (labels = labels )
119
+ except pb_utils .TritonModelException as e :
120
+ if "metrics not supported" in str (e ):
121
+ # Metrics are disabled at the server
122
+ self .metrics = None
123
+ else :
124
+ raise e
125
+
112
126
# Prepare vLLM engine
113
127
self .init_engine ()
114
128
@@ -153,15 +167,9 @@ def init_engine(self):
153
167
AsyncEngineArgs (** self .vllm_engine_config )
154
168
)
155
169
156
- # If TRITON_ENABLE_METRICS<_CPU/_GPU> build flag is enabled.
157
- if self .args ["metrics_mode" ] in ["all" , "cpu" , "gpu" ]:
158
- # Create vLLM custom Metrics
159
- labels = {
160
- "model" : self .args ["model_name" ],
161
- "version" : self .args ["model_version" ],
162
- }
163
- logger = VllmStatLogger (labels = labels )
164
- self .llm_engine .add_logger ("triton" , logger )
170
+ # Add vLLM custom metrics
171
+ if not self .metrics :
172
+ self .llm_engine .add_logger ("triton" , self .metrics )
165
173
166
174
def setup_lora (self ):
167
175
self .enable_lora = False
You can’t perform that action at this time.
0 commit comments