Skip to content

Commit e061017

Browse files
committed
refactor typing + usage
1 parent fc11648 commit e061017

File tree

3 files changed

+95
-69
lines changed

3 files changed

+95
-69
lines changed

packages/gguf/scripts/generate-llm.ts

Lines changed: 32 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,28 +8,39 @@ import { writeFileSync } from "node:fs";
88
const SOURCE_CPP_URL = "https://raw.githubusercontent.com/ggerganov/llama.cpp/master/llama.cpp";
99
const DEST_FILE_PATH = "./src/transformer-llm.ts";
1010
const DEST_COMMON_SOURCE = `
11-
type Attention<TArchitecture extends string> =
12-
& { [K in \`\${TArchitecture}.attention.head_count\`]: number }
13-
& { [K in \`\${TArchitecture}.attention.head_count_kv\`]: number }
14-
& { [K in \`\${TArchitecture}.attention.layer_norm_epsilon\`]: number }
15-
& { [K in \`\${TArchitecture}.attention.layer_norm_rms_epsilon\`]: number }
16-
& { [K in \`\${TArchitecture}.attention.alibi_bias_max\`]: number }
17-
& { [K in \`\${TArchitecture}.attention.clip_kqv\`]: number }
18-
& { [K in \`\${TArchitecture}.attention.use_norm\`]: number };
19-
20-
type Rope<TArchitecture extends LLMArchitecture> =
21-
& { [K in \`\${TArchitecture}.rope.dimension_count\`]: number }
22-
& { [K in \`\${TArchitecture}.rope.freq_base\`]: number }
23-
& { [K in \`\${TArchitecture}.rope.scale\`]: number }
24-
& { [K in \`\${TArchitecture}.rope.scale_linear\`]: number };
25-
26-
type MOE<TArchitecture extends LLMArchitecture> =
27-
& { [K in \`\${TArchitecture}.expert_count\`]: number }
28-
& { [K in \`\${TArchitecture}.expert_used_count\`]: number };
11+
/** This file is auto-generated by generate-llm.ts */
12+
13+
import type { ModelBase, GGUFGeneralInfo } from "./types";
14+
15+
type Attention<TArchitecture extends string> = Record<
16+
| \`\${TArchitecture}.attention.head_count\`
17+
| \`\${TArchitecture}.attention.head_count_kv\`
18+
| \`\${TArchitecture}.attention.layer_norm_epsilon\`
19+
| \`\${TArchitecture}.attention.layer_norm_rms_epsilon\`
20+
| \`\${TArchitecture}.attention.alibi_bias_max\`
21+
| \`\${TArchitecture}.attention.clip_kqv\`
22+
| \`\${TArchitecture}.attention.use_norm\`,
23+
number
24+
>;
25+
26+
type Rope<TArchitecture extends LLMArchitecture> = Record<
27+
| \`\${TArchitecture}.rope.dimension_count\`
28+
| \`\${TArchitecture}.rope.freq_base\`
29+
| \`\${TArchitecture}.rope.scale\`
30+
| \`\${TArchitecture}.rope.scale_linear\`,
31+
number
32+
>;
33+
34+
type MOE<TArchitecture extends LLMArchitecture> = Record<
35+
| \`\${TArchitecture}.expert_count\`
36+
| \`\${TArchitecture}.expert_used_count\`,
37+
number
38+
>;
2939
3040
export type TransformerLLMArchitecture = LLMArchitecture; // type alias
31-
export type TransformerLLMBase<TArchitecture extends LLMArchitecture> = ModelBase<TArchitecture>
32-
& MOE<TArchitecture>
41+
export type TransformerLLMBase<TArchitecture extends LLMArchitecture> = GGUFGeneralInfo<TArchitecture>
42+
& ModelBase<TArchitecture>
43+
& Partial<MOE<TArchitecture>>
3344
& Attention<TArchitecture>
3445
& Rope<TArchitecture>;
3546
@@ -163,15 +174,11 @@ async function main() {
163174
/////////////////////////////////////
164175
// write result to file
165176
const content = [
166-
"/** This file is auto-generated by generate-llm.ts */",
167-
"",
168-
'import type { ModelBase } from "./types";',
169-
"",
177+
DEST_COMMON_SOURCE,
170178
"export const LLM_ARCHITECTURES = [",
171179
...archList.map((a) => `\t${JSON.stringify(a.name)},`),
172180
"] as const;",
173181
"type LLMArchitecture = (typeof LLM_ARCHITECTURES)[number];",
174-
DEST_COMMON_SOURCE,
175182
...archList.map((a) => {
176183
let code = `export type ${a.tsName} = TransformerLLMBase<${JSON.stringify(a.name)}>`;
177184
if (a.hparams.length) {

packages/gguf/src/transformer-llm.ts

Lines changed: 39 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,44 @@
11
/** This file is auto-generated by generate-llm.ts */
22

3-
import type { ModelBase } from "./types";
3+
import type { ModelBase, GGUFGeneralInfo } from "./types";
4+
5+
type Attention<TArchitecture extends string> = Record<
6+
| `${TArchitecture}.attention.head_count`
7+
| `${TArchitecture}.attention.head_count_kv`
8+
| `${TArchitecture}.attention.layer_norm_epsilon`
9+
| `${TArchitecture}.attention.layer_norm_rms_epsilon`
10+
| `${TArchitecture}.attention.alibi_bias_max`
11+
| `${TArchitecture}.attention.clip_kqv`
12+
| `${TArchitecture}.attention.use_norm`,
13+
number
14+
>;
15+
16+
type Rope<TArchitecture extends LLMArchitecture> = Record<
17+
| `${TArchitecture}.rope.dimension_count`
18+
| `${TArchitecture}.rope.freq_base`
19+
| `${TArchitecture}.rope.scale`
20+
| `${TArchitecture}.rope.scale_linear`,
21+
number
22+
>;
23+
24+
type MOE<TArchitecture extends LLMArchitecture> = Record<
25+
`${TArchitecture}.expert_count` | `${TArchitecture}.expert_used_count`,
26+
number
27+
>;
28+
29+
export type TransformerLLMArchitecture = LLMArchitecture; // type alias
30+
export type TransformerLLMBase<TArchitecture extends LLMArchitecture> = GGUFGeneralInfo<TArchitecture> &
31+
ModelBase<TArchitecture> &
32+
Partial<MOE<TArchitecture>> &
33+
Attention<TArchitecture> &
34+
Rope<TArchitecture>;
35+
36+
export enum TransformerLLMPoolingType {
37+
UNSPECIFIED = -1,
38+
NONE = 0,
39+
MEAN = 1,
40+
CLS = 2,
41+
}
442

543
export const LLM_ARCHITECTURES = [
644
"llama",
@@ -37,36 +75,6 @@ export const LLM_ARCHITECTURES = [
3775
"olmo",
3876
] as const;
3977
type LLMArchitecture = (typeof LLM_ARCHITECTURES)[number];
40-
41-
type Attention<TArchitecture extends string> = { [K in `${TArchitecture}.attention.head_count`]: number } & {
42-
[K in `${TArchitecture}.attention.head_count_kv`]: number;
43-
} & { [K in `${TArchitecture}.attention.layer_norm_epsilon`]: number } & {
44-
[K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number;
45-
} & { [K in `${TArchitecture}.attention.alibi_bias_max`]: number } & {
46-
[K in `${TArchitecture}.attention.clip_kqv`]: number;
47-
} & { [K in `${TArchitecture}.attention.use_norm`]: number };
48-
49-
type Rope<TArchitecture extends LLMArchitecture> = { [K in `${TArchitecture}.rope.dimension_count`]: number } & {
50-
[K in `${TArchitecture}.rope.freq_base`]: number;
51-
} & { [K in `${TArchitecture}.rope.scale`]: number } & { [K in `${TArchitecture}.rope.scale_linear`]: number };
52-
53-
type MOE<TArchitecture extends LLMArchitecture> = { [K in `${TArchitecture}.expert_count`]: number } & {
54-
[K in `${TArchitecture}.expert_used_count`]: number;
55-
};
56-
57-
export type TransformerLLMArchitecture = LLMArchitecture; // type alias
58-
export type TransformerLLMBase<TArchitecture extends LLMArchitecture> = ModelBase<TArchitecture> &
59-
MOE<TArchitecture> &
60-
Attention<TArchitecture> &
61-
Rope<TArchitecture>;
62-
63-
export enum TransformerLLMPoolingType {
64-
UNSPECIFIED = -1,
65-
NONE = 0,
66-
MEAN = 1,
67-
CLS = 2,
68-
}
69-
7078
export type ArchLlama = TransformerLLMBase<"llama"> & {
7179
"llama.attention.layer_norm_rms_epsilon": number;
7280
};

packages/gguf/src/types.ts

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -50,21 +50,28 @@ export enum GGUFValueType {
5050
const ARCHITECTURES = [...LLM_ARCHITECTURES, "rwkv", "whisper"] as const;
5151
export type Architecture = (typeof ARCHITECTURES)[number];
5252

53-
interface General {
54-
"general.architecture": Architecture;
53+
export interface GGUFGeneralInfo<TArchitecture extends Architecture> {
54+
"general.architecture": TArchitecture;
5555
"general.name": string;
56-
"general.file_type": number;
57-
"general.quantization_version": number;
56+
"general.file_type"?: number;
57+
"general.quantization_version"?: number;
5858
}
5959

6060
export type ModelBase<
6161
TArchitecture extends
6262
| Architecture
6363
| `encoder.${Extract<Architecture, "whisper">}`
6464
| `decoder.${Extract<Architecture, "whisper">}`,
65-
> = { [K in `${TArchitecture}.layer_count`]: number } & { [K in `${TArchitecture}.feed_forward_length`]: number } & {
66-
[K in `${TArchitecture}.context_length`]: number;
67-
} & { [K in `${TArchitecture}.embedding_length`]: number } & { [K in `${TArchitecture}.block_count`]: number };
65+
> = Record<
66+
| `${TArchitecture}.layer_count`
67+
| `${TArchitecture}.feed_forward_length`
68+
| `${TArchitecture}.context_length`
69+
| `${TArchitecture}.embedding_length`
70+
| `${TArchitecture}.block_count`,
71+
number
72+
>;
73+
74+
/// Tokenizer
6875

6976
type TokenizerModel = "no_vocab" | "llama" | "gpt2" | "bert";
7077
interface Tokenizer {
@@ -77,18 +84,22 @@ interface Tokenizer {
7784
"tokenizer.ggml.add_bos_token": boolean;
7885
"tokenizer.chat_template": string;
7986
}
87+
type NoTokenizer = Record<keyof Tokenizer, undefined>;
88+
89+
/// Models outside of llama.cpp: "rwkv" and "whisper"
90+
91+
export type RWKV = GGUFGeneralInfo<"rwkv"> & ModelBase<"rwkv"> & { "rwkv.architecture_version": number };
92+
93+
export type Whisper = GGUFGeneralInfo<"whisper"> & ModelBase<"encoder.whisper"> & ModelBase<"decoder.whisper">;
8094

81-
export type RWKV = ModelBase<"rwkv"> & { "rwkv.architecture_version": number };
82-
export type LLM = TransformerLLM | RWKV;
83-
export type Whisper = ModelBase<"encoder.whisper"> & ModelBase<"decoder.whisper">;
84-
export type Model = (LLM | Whisper) & Partial<Tokenizer>;
95+
/// Types for parse output
8596

8697
export type GGUFMetadata = {
8798
version: Version;
8899
tensor_count: bigint;
89100
kv_count: bigint;
90-
} & Partial<General> &
91-
Partial<Model> &
101+
} & (Whisper | RWKV | TransformerLLM) &
102+
(NoTokenizer | Tokenizer) &
92103
Record<string, MetadataValue>;
93104

94105
export interface GGUFTensorInfo {

0 commit comments

Comments
 (0)