@@ -105,20 +105,20 @@ class CompletionRequest:
105
105
logit_bias : Optional [Dict [str , float ]] = None # unimplemented
106
106
logprobs : Optional [bool ] = None # unimplemented
107
107
top_logprobs : Optional [int ] = None # unimplemented
108
- max_tokens : Optional [int ] = None # unimplemented
108
+ max_tokens : Optional [int ] = None
109
109
n : int = 1
110
110
presence_penalty : float = 0 # unimplemented
111
111
response_format : Optional [ResponseFormat ] = None # unimplemented
112
- seed : Optional [int ] = None # unimplemented
112
+ seed : Optional [int ] = None
113
113
service_tier : Optional [str ] = None # unimplemented
114
114
stop : Optional [List [str ]] = None # unimplemented
115
115
stream : bool = False
116
116
stream_options : Optional [StreamOptions ] = None # unimplemented
117
- temperature : Optional [float ] = 1.0 # unimplemented
117
+ temperature : Optional [float ] = 1.0
118
118
top_p : Optional [float ] = 1.0 # unimplemented
119
- tools : Optional [List [Any ]] = None # unimplemented
120
- tool_choice : Optional [Union [str , Any ]] = None # unimplemented
121
- parallel_tool_calls : Optional [bool ] = None # unimplemented
119
+ tools : Optional [List [Any ]] = None # unimplemented - Assistant features
120
+ tool_choice : Optional [Union [str , Any ]] = None # unimplemented - Assistant features
121
+ parallel_tool_calls : Optional [bool ] = None # unimplemented - Assistant features
122
122
user : Optional [str ] = None # unimplemented
123
123
124
124
@@ -229,9 +229,8 @@ def __init__(self, *args, **kwargs):
229
229
else self .model .config .max_seq_length
230
230
)
231
231
# The System fingerprint is a unique identifier for the model and its configuration.
232
- # Currently, this is not implemented in a
233
232
self .system_fingerprint = (
234
- self .builder_args .device + type (self .builder_args .precision ). __name__
233
+ self .builder_args .device + "_" + str (self .builder_args .precision )
235
234
)
236
235
237
236
def chunked_completion (self , completion_request : CompletionRequest ):
@@ -270,7 +269,11 @@ def chunked_completion(self, completion_request: CompletionRequest):
270
269
)
271
270
generator_args = GeneratorArgs (
272
271
completion_request .messages [- 1 ].get ("content" ),
272
+ max_new_tokens = (
273
+ completion_request .max_tokens if completion_request .max_tokens else 16
274
+ ),
273
275
encoded_prompt = encoded ,
276
+ temperature = completion_request .temperature ,
274
277
chat_mode = False ,
275
278
)
276
279
0 commit comments