Skip to content

Commit 5518efc

Browse files
author
Varun Puri
committed
Replace browser UI with Basic Streamlit UI Implementation
1 parent ec55576 commit 5518efc

File tree

7 files changed

+104
-282
lines changed

7 files changed

+104
-282
lines changed

README.md

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -135,22 +135,21 @@ for tok in gen.generate(generator_args):
135135

136136
### Browser
137137

138+
Use Streamlit to launch an interactive chat with your model. Running the command will automatically open a tab in your browser.
139+
```
140+
streamlit run torchchat.py -- browser <model_name> <model_args>
141+
```
142+
143+
For example, to quantise and chat with LLaMA3:
138144
[skip default]: begin
139145
```
140-
python3 torchchat.py browser llama3
146+
streamlit run torchchat.py -- browser llama3 --quantize '{"precision": {"dtype":"float16"}, "executor":{"accelerator":"cpu"}}' --max-new-tokens 256 --compile
141147
```
142148
[skip default]: end
143149

144150

145-
*Running on http://127.0.0.1:5000* should be printed out on the
146-
terminal. Click the link or go to
147-
[http://127.0.0.1:5000](http://127.0.0.1:5000) on your browser to
148-
start interacting with it.
149151

150-
Enter some text in the input box, then hit the enter key or click the
151-
“SEND” button. After a second or two, the text you entered together
152-
with the generated text will be displayed. Repeat to have a
153-
conversation.
152+
154153

155154

156155

browser/browser.py

Lines changed: 81 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,85 @@
1-
import subprocess
2-
import sys
1+
import time
2+
3+
import streamlit as st
4+
from api.api import CompletionRequest, OpenAIAPIGenerator
5+
6+
from build.builder import BuilderArgs, TokenizerArgs
7+
8+
from generate import GeneratorArgs
39

410

511
def main(args):
12+
builder_args = BuilderArgs.from_args(args)
13+
speculative_builder_args = BuilderArgs.from_speculative_args(args)
14+
tokenizer_args = TokenizerArgs.from_args(args)
15+
generator_args = GeneratorArgs.from_args(args)
16+
generator_args.chat_mode = False
17+
18+
@st.cache_resource
19+
def initialize_generator() -> OpenAIAPIGenerator:
20+
return OpenAIAPIGenerator(
21+
builder_args,
22+
speculative_builder_args,
23+
tokenizer_args,
24+
generator_args,
25+
args.profile,
26+
args.quantize,
27+
args.draft_quantize,
28+
)
29+
30+
gen = initialize_generator()
31+
32+
st.title("TorchChat")
33+
34+
# Initialize chat history
35+
if "messages" not in st.session_state:
36+
st.session_state.messages = []
37+
38+
# Display chat messages from history on app rerun
39+
for message in st.session_state.messages:
40+
with st.chat_message(message["role"]):
41+
st.markdown(message["content"])
42+
43+
# Accept user input
44+
if prompt := st.chat_input("What is up?"):
45+
# Add user message to chat history
46+
st.session_state.messages.append({"role": "user", "content": prompt})
47+
# Display user message in chat message container
48+
with st.chat_message("user"):
49+
st.markdown(prompt)
50+
51+
# Display assistant response in chat message container
52+
with st.chat_message("assistant"), st.status(
53+
"Generating... ", expanded=True
54+
) as status:
55+
56+
req = CompletionRequest(
57+
model=gen.builder_args.checkpoint_path,
58+
prompt=prompt,
59+
temperature=generator_args.temperature,
60+
messages=[],
61+
)
62+
63+
def unwrap(completion_generator):
64+
start = time.time()
65+
tokcount = 0
66+
for chunk_response in completion_generator:
67+
content = chunk_response.choices[0].delta.content
68+
if not gen.is_llama3_model or content not in set(
69+
gen.tokenizer.special_tokens.keys()
70+
):
71+
yield content
72+
if content == gen.tokenizer.eos_id():
73+
yield "."
74+
tokcount += 1
75+
status.update(
76+
label="Done, averaged {:.2f} tokens/second".format(
77+
tokcount / (time.time() - start)
78+
),
79+
state="complete",
80+
)
81+
82+
response = st.write_stream(unwrap(gen.completion(req)))
683

7-
# Directory Containing the server file "chat_in_browser.py"
8-
server_dir = "browser"
9-
10-
# Look for port from cmd args. Default to 5000 if not found.
11-
port = 5000
12-
i = 2
13-
while i < len(sys.argv):
14-
if sys.argv[i] == "--port":
15-
if i + 1 < len(sys.argv):
16-
# Extract the value and remove '--port' and the value from sys.argv
17-
port = sys.argv[i + 1]
18-
del sys.argv[i : i + 2]
19-
break
20-
else:
21-
i += 1
22-
23-
# Construct arguments for the flask app minus 'browser' command
24-
# plus '--chat'
25-
args_plus_chat = ["'{}'".format(s) for s in sys.argv[1:] if s != "browser"] + [
26-
'"--chat"'
27-
]
28-
formatted_args = ", ".join(args_plus_chat)
29-
command = [
30-
"flask",
31-
"--app",
32-
f"{server_dir}/chat_in_browser:create_app(" + formatted_args + ")",
33-
"run",
34-
"--port",
35-
f"{port}",
36-
]
37-
subprocess.run(command)
84+
# Add assistant response to chat history
85+
st.session_state.messages.append({"role": "assistant", "content": response})

browser/chat_in_browser.py

Lines changed: 0 additions & 107 deletions
This file was deleted.

browser/static/css/style.css

Lines changed: 0 additions & 96 deletions
This file was deleted.

browser/templates/chat.html

Lines changed: 0 additions & 27 deletions
This file was deleted.

0 commit comments

Comments
 (0)