Skip to content

Commit 5f631c2

Browse files
authored
Fixing race condition in server and partial stream handling in frontend. (#2391)
* Fixing race condition in server.cpp and partial stream handling in completion.js * Reverting assert edits. * Adding newline to eof
1 parent 415e99f commit 5f631c2

File tree

2 files changed

+42
-19
lines changed

2 files changed

+42
-19
lines changed

examples/server/public/completion.js

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ export async function* llama(prompt, params = {}, config = {}) {
4343
const decoder = new TextDecoder();
4444

4545
let content = "";
46+
let leftover = ""; // Buffer for partially read lines
4647

4748
try {
4849
let cont = true;
@@ -53,29 +54,47 @@ export async function* llama(prompt, params = {}, config = {}) {
5354
break;
5455
}
5556

56-
// sse answers in the form multiple lines of: value\n with data always present as a key. in our case we
57-
// mainly care about the data: key here, which we expect as json
58-
const text = decoder.decode(result.value);
57+
// Add any leftover data to the current chunk of data
58+
const text = leftover + decoder.decode(result.value);
5959

60-
// parse all sse events and add them to result
61-
const regex = /^(\S+):\s(.*)$/gm;
62-
for (const match of text.matchAll(regex)) {
63-
result[match[1]] = match[2]
64-
}
60+
// Check if the last character is a line break
61+
const endsWithLineBreak = text.endsWith('\n');
6562

66-
// since we know this is llama.cpp, let's just decode the json in data
67-
result.data = JSON.parse(result.data);
68-
content += result.data.content;
63+
// Split the text into lines
64+
let lines = text.split('\n');
6965

70-
// yield
71-
yield result;
66+
// If the text doesn't end with a line break, then the last line is incomplete
67+
// Store it in leftover to be added to the next chunk of data
68+
if (!endsWithLineBreak) {
69+
leftover = lines.pop();
70+
} else {
71+
leftover = ""; // Reset leftover if we have a line break at the end
72+
}
7273

73-
// if we got a stop token from server, we will break here
74-
if (result.data.stop) {
75-
if (result.data.generation_settings) {
76-
generation_settings = result.data.generation_settings;
74+
// Parse all sse events and add them to result
75+
const regex = /^(\S+):\s(.*)$/gm;
76+
for (const line of lines) {
77+
const match = regex.exec(line);
78+
if (match) {
79+
result[match[1]] = match[2]
80+
// since we know this is llama.cpp, let's just decode the json in data
81+
if (result.data) {
82+
result.data = JSON.parse(result.data);
83+
content += result.data.content;
84+
85+
// yield
86+
yield result;
87+
88+
// if we got a stop token from server, we will break here
89+
if (result.data.stop) {
90+
if (result.data.generation_settings) {
91+
generation_settings = result.data.generation_settings;
92+
}
93+
cont = false;
94+
break;
95+
}
96+
}
7797
}
78-
break;
7998
}
8099
}
81100
} catch (e) {

examples/server/server.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1274,7 +1274,11 @@ int main(int argc, char **argv)
12741274
sink.done();
12751275
return true;
12761276
};
1277-
res.set_chunked_content_provider("text/event-stream", chunked_content_provider);
1277+
const auto on_complete = [&](bool) {
1278+
llama.mutex.unlock();
1279+
};
1280+
lock.release();
1281+
res.set_chunked_content_provider("text/event-stream", chunked_content_provider, on_complete);
12781282
} });
12791283

12801284
svr.Get("/model.json", [&llama](const Request &, Response &res)

0 commit comments

Comments
 (0)