@@ -235,78 +235,3 @@ def _callback(self, x, *, buffer, done_generating):
235
235
done_generating = True
236
236
if self .is_llama3_model and x .item () == self .tokenizer .special_tokens ["<|eot_id|>" ]:
237
237
done_generating = True
238
-
239
- def main (args ):
240
- builder_args = BuilderArgs .from_args (args )
241
- speculative_builder_args = BuilderArgs .from_speculative_args (args )
242
- tokenizer_args = TokenizerArgs .from_args (args )
243
- generator_args = GeneratorArgs .from_args (args )
244
- generator_args .chat_mode = False
245
-
246
- @st .cache_resource
247
- def initialize_generator () -> OpenAIAPIGenerator :
248
- return OpenAIAPIGenerator (
249
- builder_args ,
250
- speculative_builder_args ,
251
- tokenizer_args ,
252
- generator_args ,
253
- args .profile ,
254
- args .quantize ,
255
- args .draft_quantize ,
256
- )
257
-
258
- gen = initialize_generator ()
259
-
260
- tokens_generated = 0
261
- st .title ("Simple chat" )
262
-
263
- # Initialize chat history
264
- if "messages" not in st .session_state :
265
- st .session_state .messages = []
266
-
267
- # Display chat messages from history on app rerun
268
- for message in st .session_state .messages :
269
- with st .chat_message (message ["role" ]):
270
- st .markdown (message ["content" ])
271
-
272
- # Accept user input
273
- if prompt := st .chat_input ("What is up?" ):
274
- # Add user message to chat history
275
- st .session_state .messages .append ({"role" : "user" , "content" : prompt })
276
- # Display user message in chat message container
277
- with st .chat_message ("user" ):
278
- st .markdown (prompt )
279
-
280
- # Display assistant response in chat message container
281
- with st .chat_message ("assistant" ):
282
-
283
- req = CompletionRequest (
284
- model = gen .builder_args .checkpoint_path ,
285
- prompt = prompt ,
286
- temperature = generator_args .temperature ,
287
- messages = [],
288
- )
289
-
290
- def unwrap (completion_generator ):
291
- for chunk_response in completion_generator :
292
- content = chunk_response .choices [0 ].delta .content
293
- if not gen .is_llama3_model or content not in set (gen .tokenizer .special_tokens .keys ()):
294
- yield content
295
- yield "."
296
-
297
- response = st .write_stream (unwrap (gen .completion (req )))
298
-
299
- # Add assistant response to chat history
300
- st .session_state .messages .append ({"role" : "assistant" , "content" : response })
301
-
302
-
303
-
304
-
305
- if __name__ == "__main__" :
306
- parser = argparse .ArgumentParser (description = "torchchat generate CLI" )
307
- verb = "generate"
308
- add_arguments_for_verb (parser , verb )
309
- args = parser .parse_args ()
310
- check_args (args , verb )
311
- args = arg_init (args )
312
- main (args )
0 commit comments