Skip to content

Commit 8af3a99

Browse files
committed
Merge branch 'master' into gguf
2 parents 6f14854 + d783f79 commit 8af3a99

File tree

13 files changed

+3271
-2602
lines changed

13 files changed

+3271
-2602
lines changed

examples/server/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Command line options:
1616
- `--memory-f32`: Use 32-bit floats instead of 16-bit floats for memory key+value. Not recommended.
1717
- `--mlock`: Lock the model in memory, preventing it from being swapped out when memory-mapped.
1818
- `--no-mmap`: Do not memory-map the model. By default, models are mapped into memory, which allows the system to load only the necessary parts of the model as needed.
19+
- `--numa`: Attempt optimizations that help on some NUMA systems.
1920
- `--lora FNAME`: Apply a LoRA (Low-Rank Adaptation) adapter to the model (implies --no-mmap). This allows you to adapt the pretrained model to specific tasks or domains.
2021
- `--lora-base FNAME`: Optional model to use as a base for the layers modified by the LoRA adapter. This flag is used in conjunction with the `--lora` flag, and specifies the base model for the adaptation.
2122
- `-to N`, `--timeout N`: Server read/write timeout in seconds. Default `600`.

examples/server/chat.mjs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,34 @@
11
import * as readline from 'node:readline'
22
import { stdin, stdout } from 'node:process'
3+
import { readFileSync } from 'node:fs'
4+
import { SchemaConverter } from './public/json-schema-to-grammar.mjs'
5+
6+
const args = process.argv.slice(2);
7+
const grammarJsonSchemaFile = args.find(
8+
(_, index) => args[index - 1] === "--grammar-json-schema"
9+
);
10+
const grammarFile = args.find((_, index) => args[index - 1] === "--grammar");
11+
12+
// Example usage: function,arguments
13+
const grammarJsonSchemaPropOrder = args.find(
14+
(_, index) => args[index - 1] === "--grammar-json-schema-prop-order"
15+
);
16+
const propOrder = grammarJsonSchemaPropOrder
17+
? grammarJsonSchemaPropOrder
18+
.split(",")
19+
.reduce((acc, cur, index) => ({ ...acc, [cur]: index }), {})
20+
: {};
21+
22+
let grammar = null
23+
if (grammarJsonSchemaFile) {
24+
const schema = JSON.parse(readFileSync(grammarJsonSchemaFile, 'utf-8'))
25+
const converter = new SchemaConverter(propOrder)
26+
converter.visit(schema, '')
27+
grammar = converter.formatGrammar()
28+
}
29+
if (grammarFile) {
30+
grammar = readFileSync(grammarFile, 'utf-8')
31+
}
332

433
const API_URL = 'http://127.0.0.1:8080'
534

@@ -48,6 +77,7 @@ async function chat_completion(question) {
4877
n_keep: n_keep,
4978
n_predict: 256,
5079
stop: ["\n### Human:"], // stop completion after generating this
80+
grammar,
5181
stream: true,
5282
})
5383
})

examples/server/index.html.hpp

Lines changed: 1000 additions & 892 deletions
Large diffs are not rendered by default.

examples/server/index.js.hpp

Lines changed: 1704 additions & 1679 deletions
Large diffs are not rendered by default.

examples/server/json-schema-to-grammar.mjs.hpp

Lines changed: 311 additions & 0 deletions
Large diffs are not rendered by default.

examples/server/public/index.html

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@
141141
} from '/index.js';
142142

143143
import { llama } from '/completion.js';
144+
import { SchemaConverter } from '/json-schema-to-grammar.mjs';
144145

145146
const session = signal({
146147
prompt: "This is a conversation between user and llama, a friendly chatbot. respond in simple markdown.",
@@ -166,6 +167,7 @@
166167
mirostat: 0, // 0/1/2
167168
mirostat_tau: 5, // target entropy
168169
mirostat_eta: 0.1, // learning rate
170+
grammar: '',
169171
})
170172

171173
const llamaStats = signal(null)
@@ -304,6 +306,26 @@
304306
const updateParamsFloat = (el) => params.value = { ...params.value, [el.target.name]: parseFloat(el.target.value) }
305307
const updateParamsInt = (el) => params.value = { ...params.value, [el.target.name]: Math.floor(parseFloat(el.target.value)) }
306308

309+
const grammarJsonSchemaPropOrder = signal('')
310+
const updateGrammarJsonSchemaPropOrder = (el) => grammarJsonSchemaPropOrder.value = el.target.value
311+
const convertJSONSchemaGrammar = () => {
312+
try {
313+
const schema = JSON.parse(params.value.grammar)
314+
const converter = new SchemaConverter(
315+
grammarJsonSchemaPropOrder.value
316+
.split(',')
317+
.reduce((acc, cur, i) => ({...acc, [cur.trim()]: i}), {})
318+
)
319+
converter.visit(schema, '')
320+
params.value = {
321+
...params.value,
322+
grammar: converter.formatGrammar(),
323+
}
324+
} catch (e) {
325+
alert(`Convert failed: ${e.message}`)
326+
}
327+
}
328+
307329
const FloatField = ({label, max, min, name, step, value}) => {
308330
return html`
309331
<div>
@@ -355,6 +377,13 @@
355377
<label for="template">Chat history template</label>
356378
<textarea id="template" name="historyTemplate" value="${session.value.historyTemplate}" rows=1 oninput=${updateSession}/>
357379
</div>
380+
381+
<div>
382+
<label for="template">Grammar</label>
383+
<textarea id="grammar" name="grammar" placeholder="Use gbnf or JSON Schema+convert" value="${params.value.grammar}" rows=4 oninput=${updateParams}/>
384+
<input type="text" name="prop-order" placeholder="order: prop1,prop2,prop3" oninput=${updateGrammarJsonSchemaPropOrder} />
385+
<button type="button" onclick=${convertJSONSchemaGrammar}>Convert JSON Schema</button>
386+
</div>
358387
</fieldset>
359388
360389
<fieldset class="two">

examples/server/public/index.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
const SPACE_RULE = '" "?';
2+
3+
const PRIMITIVE_RULES = {
4+
boolean: '("true" | "false") space',
5+
number: '("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space',
6+
integer: '("-"? ([0-9] | [1-9] [0-9]*)) space',
7+
string: ` "\\"" (
8+
[^"\\\\] |
9+
"\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
10+
)* "\\"" space`,
11+
null: '"null" space',
12+
};
13+
14+
const INVALID_RULE_CHARS_RE = /[^\dA-Za-z-]+/g;
15+
const GRAMMAR_LITERAL_ESCAPE_RE = /[\n\r"]/g;
16+
const GRAMMAR_LITERAL_ESCAPES = {'\r': '\\r', '\n': '\\n', '"': '\\"'};
17+
18+
export class SchemaConverter {
19+
constructor(propOrder) {
20+
this._propOrder = propOrder || {};
21+
this._rules = new Map();
22+
this._rules.set('space', SPACE_RULE);
23+
}
24+
25+
_formatLiteral(literal) {
26+
const escaped = JSON.stringify(literal).replace(
27+
GRAMMAR_LITERAL_ESCAPE_RE,
28+
m => GRAMMAR_LITERAL_ESCAPES[m]
29+
);
30+
return `"${escaped}"`;
31+
}
32+
33+
_addRule(name, rule) {
34+
let escName = name.replace(INVALID_RULE_CHARS_RE, '-');
35+
let key = escName;
36+
37+
if (this._rules.has(escName)) {
38+
if (this._rules.get(escName) === rule) {
39+
return key;
40+
}
41+
42+
let i = 0;
43+
while (this._rules.has(`${escName}${i}`)) {
44+
i += 1;
45+
}
46+
key = `${escName}${i}`;
47+
}
48+
49+
this._rules.set(key, rule);
50+
return key;
51+
}
52+
53+
visit(schema, name) {
54+
const schemaType = schema.type;
55+
const ruleName = name || 'root';
56+
57+
if (schema.oneOf || schema.anyOf) {
58+
const rule = (schema.oneOf || schema.anyOf).map((altSchema, i) =>
59+
this.visit(altSchema, `${name}${name ? "-" : ""}${i}`)
60+
).join(' | ');
61+
62+
return this._addRule(ruleName, rule);
63+
} else if ('const' in schema) {
64+
return this._addRule(ruleName, this._formatLiteral(schema.const));
65+
} else if ('enum' in schema) {
66+
const rule = schema.enum.map(v => this._formatLiteral(v)).join(' | ');
67+
return this._addRule(ruleName, rule);
68+
} else if (schemaType === 'object' && 'properties' in schema) {
69+
// TODO: `required` keyword (from python implementation)
70+
const propOrder = this._propOrder;
71+
const propPairs = Object.entries(schema.properties).sort((a, b) => {
72+
// sort by position in prop_order (if specified) then by key
73+
const orderA = typeof propOrder[a[0]] === 'number' ? propOrder[a[0]] : Infinity;
74+
const orderB = typeof propOrder[b[0]] === 'number' ? propOrder[b[0]] : Infinity;
75+
return orderA - orderB || a[0].localeCompare(b[0]);
76+
});
77+
78+
let rule = '"{" space';
79+
propPairs.forEach(([propName, propSchema], i) => {
80+
const propRuleName = this.visit(propSchema, `${name}${name ? "-" : ""}${propName}`);
81+
if (i > 0) {
82+
rule += ' "," space';
83+
}
84+
rule += ` ${this._formatLiteral(propName)} space ":" space ${propRuleName}`;
85+
});
86+
rule += ' "}" space';
87+
88+
return this._addRule(ruleName, rule);
89+
} else if (schemaType === 'array' && 'items' in schema) {
90+
// TODO `prefixItems` keyword (from python implementation)
91+
const itemRuleName = this.visit(schema.items, `${name}${name ? "-" : ""}item`);
92+
const rule = `"[" space (${itemRuleName} ("," space ${itemRuleName})*)? "]" space`;
93+
return this._addRule(ruleName, rule);
94+
} else {
95+
if (!PRIMITIVE_RULES[schemaType]) {
96+
throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`);
97+
}
98+
return this._addRule(
99+
ruleName === 'root' ? 'root' : schemaType,
100+
PRIMITIVE_RULES[schemaType]
101+
);
102+
}
103+
}
104+
105+
formatGrammar() {
106+
let grammar = '';
107+
this._rules.forEach((rule, name) => {
108+
grammar += `${name} ::= ${rule}\n`;
109+
});
110+
return grammar;
111+
}
112+
}

examples/server/server.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,7 @@ static void server_print_usage(const char *argv0, const gpt_params &params,
666666
{
667667
fprintf(stdout, " --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)\n");
668668
}
669+
fprintf(stdout, " --numa attempt optimizations that help on some NUMA systems\n");
669670
#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD
670671
fprintf(stdout, " -ngl N, --n-gpu-layers N\n");
671672
fprintf(stdout, " number of layers to store in VRAM\n");
@@ -940,6 +941,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
940941
{
941942
params.use_mmap = false;
942943
}
944+
else if (arg == "--numa")
945+
{
946+
params.numa = true;
947+
}
943948
else if (arg == "--embedding")
944949
{
945950
params.embedding = true;

0 commit comments

Comments
 (0)