File tree Expand file tree Collapse file tree 1 file changed +19
-3
lines changed Expand file tree Collapse file tree 1 file changed +19
-3
lines changed Original file line number Diff line number Diff line change 21
21
22
22
from torchchat .cli .download import is_model_downloaded , load_model_configs
23
23
from torchchat .generate import Generator , GeneratorArgs
24
+ from torchchat .model import FlamingoModel
24
25
25
26
from torchchat .utils .build_utils import device_sync
26
27
@@ -363,9 +364,24 @@ def chunked_completion(self, completion_request: CompletionRequest):
363
364
364
365
device_sync (device = self .builder_args .device )
365
366
366
- encoded , batch = self ._gen_model_inputs_from_openai_completion_request (
367
- completion_request
368
- )
367
+ # If the underlying model is LLama3.2 11B, used unified processing
368
+ if isinstance (self .model , FlamingoModel ):
369
+ encoded , batch = self ._gen_model_inputs_from_openai_completion_request (
370
+ completion_request
371
+ )
372
+ else :
373
+ # Else use the legacy formatting logic
374
+ tokens = self .chat_formatter .encode_dialog_prompt (
375
+ dialog = [
376
+ {"role" : message ["role" ], "content" : message ["content" ]}
377
+ for message in completion_request .messages
378
+ ]
379
+ )
380
+ print ("tokens:" , self .tokenizer .decode (tokens ), flush = True )
381
+ encoded = torch .tensor (
382
+ tokens , dtype = torch .int , device = self .builder_args .device
383
+ )
384
+ batch = None
369
385
370
386
idx = 0
371
387
start_pos = 0
You can’t perform that action at this time.
0 commit comments