@@ -43,6 +43,7 @@ export async function* llama(prompt, params = {}, config = {}) {
43
43
const decoder = new TextDecoder ( ) ;
44
44
45
45
let content = "" ;
46
+ let leftover = "" ; // Buffer for partially read lines
46
47
47
48
try {
48
49
let cont = true ;
@@ -53,29 +54,47 @@ export async function* llama(prompt, params = {}, config = {}) {
53
54
break ;
54
55
}
55
56
56
- // sse answers in the form multiple lines of: value\n with data always present as a key. in our case we
57
- // mainly care about the data: key here, which we expect as json
58
- const text = decoder . decode ( result . value ) ;
57
+ // Add any leftover data to the current chunk of data
58
+ const text = leftover + decoder . decode ( result . value ) ;
59
59
60
- // parse all sse events and add them to result
61
- const regex = / ^ ( \S + ) : \s ( .* ) $ / gm;
62
- for ( const match of text . matchAll ( regex ) ) {
63
- result [ match [ 1 ] ] = match [ 2 ]
64
- }
60
+ // Check if the last character is a line break
61
+ const endsWithLineBreak = text . endsWith ( '\n' ) ;
65
62
66
- // since we know this is llama.cpp, let's just decode the json in data
67
- result . data = JSON . parse ( result . data ) ;
68
- content += result . data . content ;
63
+ // Split the text into lines
64
+ let lines = text . split ( '\n' ) ;
69
65
70
- // yield
71
- yield result ;
66
+ // If the text doesn't end with a line break, then the last line is incomplete
67
+ // Store it in leftover to be added to the next chunk of data
68
+ if ( ! endsWithLineBreak ) {
69
+ leftover = lines . pop ( ) ;
70
+ } else {
71
+ leftover = "" ; // Reset leftover if we have a line break at the end
72
+ }
72
73
73
- // if we got a stop token from server, we will break here
74
- if ( result . data . stop ) {
75
- if ( result . data . generation_settings ) {
76
- generation_settings = result . data . generation_settings ;
74
+ // Parse all sse events and add them to result
75
+ const regex = / ^ ( \S + ) : \s ( .* ) $ / gm;
76
+ for ( const line of lines ) {
77
+ const match = regex . exec ( line ) ;
78
+ if ( match ) {
79
+ result [ match [ 1 ] ] = match [ 2 ]
80
+ // since we know this is llama.cpp, let's just decode the json in data
81
+ if ( result . data ) {
82
+ result . data = JSON . parse ( result . data ) ;
83
+ content += result . data . content ;
84
+
85
+ // yield
86
+ yield result ;
87
+
88
+ // if we got a stop token from server, we will break here
89
+ if ( result . data . stop ) {
90
+ if ( result . data . generation_settings ) {
91
+ generation_settings = result . data . generation_settings ;
92
+ }
93
+ cont = false ;
94
+ break ;
95
+ }
96
+ }
77
97
}
78
- break ;
79
98
}
80
99
}
81
100
} catch ( e ) {
0 commit comments