|
19 | 19 | See https://platform.openai.com/docs/api-reference/chat for the full specification and details.
|
20 | 20 | """
|
21 | 21 |
|
| 22 | +OPENAI_API_DEFAULT_MAX_TOKENS = 16 |
| 23 | + |
22 | 24 | # Message classes and associated objects - see the types of Messages under "Create Chat Completion >>> Request body >>> messages"
|
23 | 25 |
|
24 | 26 |
|
@@ -105,20 +107,20 @@ class CompletionRequest:
|
105 | 107 | logit_bias: Optional[Dict[str, float]] = None # unimplemented
|
106 | 108 | logprobs: Optional[bool] = None # unimplemented
|
107 | 109 | top_logprobs: Optional[int] = None # unimplemented
|
108 |
| - max_tokens: Optional[int] = None # unimplemented |
| 110 | + max_tokens: Optional[int] = None |
109 | 111 | n: int = 1
|
110 | 112 | presence_penalty: float = 0 # unimplemented
|
111 | 113 | response_format: Optional[ResponseFormat] = None # unimplemented
|
112 |
| - seed: Optional[int] = None # unimplemented |
| 114 | + seed: Optional[int] = None |
113 | 115 | service_tier: Optional[str] = None # unimplemented
|
114 | 116 | stop: Optional[List[str]] = None # unimplemented
|
115 | 117 | stream: bool = False
|
116 | 118 | stream_options: Optional[StreamOptions] = None # unimplemented
|
117 |
| - temperature: Optional[float] = 1.0 # unimplemented |
| 119 | + temperature: Optional[float] = 1.0 |
118 | 120 | top_p: Optional[float] = 1.0 # unimplemented
|
119 |
| - tools: Optional[List[Any]] = None # unimplemented |
120 |
| - tool_choice: Optional[Union[str, Any]] = None # unimplemented |
121 |
| - parallel_tool_calls: Optional[bool] = None # unimplemented |
| 121 | + tools: Optional[List[Any]] = None # unimplemented - Assistant features |
| 122 | + tool_choice: Optional[Union[str, Any]] = None # unimplemented - Assistant features |
| 123 | + parallel_tool_calls: Optional[bool] = None # unimplemented - Assistant features |
122 | 124 | user: Optional[str] = None # unimplemented
|
123 | 125 |
|
124 | 126 |
|
@@ -229,9 +231,8 @@ def __init__(self, *args, **kwargs):
|
229 | 231 | else self.model.config.max_seq_length
|
230 | 232 | )
|
231 | 233 | # The System fingerprint is a unique identifier for the model and its configuration.
|
232 |
| - # Currently, this is not implemented in a |
233 | 234 | self.system_fingerprint = (
|
234 |
| - self.builder_args.device + type(self.builder_args.precision).__name__ |
| 235 | + f"{self.builder_args.device}_{self.builder_args.precision}" |
235 | 236 | )
|
236 | 237 |
|
237 | 238 | def chunked_completion(self, completion_request: CompletionRequest):
|
@@ -270,7 +271,13 @@ def chunked_completion(self, completion_request: CompletionRequest):
|
270 | 271 | )
|
271 | 272 | generator_args = GeneratorArgs(
|
272 | 273 | completion_request.messages[-1].get("content"),
|
| 274 | + max_new_tokens=( |
| 275 | + int(completion_request.max_tokens) |
| 276 | + if completion_request.max_tokens |
| 277 | + else OPENAI_API_DEFAULT_MAX_TOKENS |
| 278 | + ), |
273 | 279 | encoded_prompt=encoded,
|
| 280 | + temperature=completion_request.temperature, |
274 | 281 | chat_mode=False,
|
275 | 282 | )
|
276 | 283 |
|
|
0 commit comments