Skip to content

Fixing race condition in server and partial stream handling in frontend. #2391

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 37 additions & 18 deletions examples/server/public/completion.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ export async function* llama(prompt, params = {}, config = {}) {
const decoder = new TextDecoder();

let content = "";
let leftover = ""; // Buffer for partially read lines

try {
let cont = true;
Expand All @@ -53,29 +54,47 @@ export async function* llama(prompt, params = {}, config = {}) {
break;
}

// sse answers in the form multiple lines of: value\n with data always present as a key. in our case we
// mainly care about the data: key here, which we expect as json
const text = decoder.decode(result.value);
// Add any leftover data to the current chunk of data
const text = leftover + decoder.decode(result.value);

// parse all sse events and add them to result
const regex = /^(\S+):\s(.*)$/gm;
for (const match of text.matchAll(regex)) {
result[match[1]] = match[2]
}
// Check if the last character is a line break
const endsWithLineBreak = text.endsWith('\n');

// since we know this is llama.cpp, let's just decode the json in data
result.data = JSON.parse(result.data);
content += result.data.content;
// Split the text into lines
let lines = text.split('\n');

// yield
yield result;
// If the text doesn't end with a line break, then the last line is incomplete
// Store it in leftover to be added to the next chunk of data
if (!endsWithLineBreak) {
leftover = lines.pop();
} else {
leftover = ""; // Reset leftover if we have a line break at the end
}

// if we got a stop token from server, we will break here
if (result.data.stop) {
if (result.data.generation_settings) {
generation_settings = result.data.generation_settings;
// Parse all sse events and add them to result
const regex = /^(\S+):\s(.*)$/gm;
for (const line of lines) {
const match = regex.exec(line);
if (match) {
result[match[1]] = match[2]
// since we know this is llama.cpp, let's just decode the json in data
if (result.data) {
result.data = JSON.parse(result.data);
content += result.data.content;

// yield
yield result;

// if we got a stop token from server, we will break here
if (result.data.stop) {
if (result.data.generation_settings) {
generation_settings = result.data.generation_settings;
}
cont = false;
break;
}
}
}
break;
}
}
} catch (e) {
Expand Down
6 changes: 5 additions & 1 deletion examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1263,7 +1263,11 @@ int main(int argc, char **argv)
sink.done();
return true;
};
res.set_chunked_content_provider("text/event-stream", chunked_content_provider);
const auto on_complete = [&](bool) {
llama.mutex.unlock();
};
lock.release();
res.set_chunked_content_provider("text/event-stream", chunked_content_provider, on_complete);
} });

svr.Get("/model.json", [&llama](const Request &, Response &res)
Expand Down