refactor typing + usage

ngxson · ngxson · commit e0610178cd93 · 2024-05-04T22:52:44.000+02:00
diff --git a/packages/gguf/scripts/generate-llm.ts b/packages/gguf/scripts/generate-llm.ts
@@ -8,28 +8,39 @@ import { writeFileSync } from "node:fs";
 const SOURCE_CPP_URL = "https://raw.githubusercontent.com/ggerganov/llama.cpp/master/llama.cpp";
 const DEST_FILE_PATH = "./src/transformer-llm.ts";
 const DEST_COMMON_SOURCE = `
-type Attention<TArchitecture extends string> =
-	& { [K in \`\${TArchitecture}.attention.head_count\`]: number }
-	& { [K in \`\${TArchitecture}.attention.head_count_kv\`]: number }
-	& { [K in \`\${TArchitecture}.attention.layer_norm_epsilon\`]: number }
-	& { [K in \`\${TArchitecture}.attention.layer_norm_rms_epsilon\`]: number }
-	& { [K in \`\${TArchitecture}.attention.alibi_bias_max\`]: number }
-	& { [K in \`\${TArchitecture}.attention.clip_kqv\`]: number }
-	& { [K in \`\${TArchitecture}.attention.use_norm\`]: number };
-
-type Rope<TArchitecture extends LLMArchitecture> =
-	& { [K in \`\${TArchitecture}.rope.dimension_count\`]: number }
-	& { [K in \`\${TArchitecture}.rope.freq_base\`]: number }
-	& { [K in \`\${TArchitecture}.rope.scale\`]: number }
-	& { [K in \`\${TArchitecture}.rope.scale_linear\`]: number };
-
-type MOE<TArchitecture extends LLMArchitecture> =
-	& { [K in \`\${TArchitecture}.expert_count\`]: number }
-	& { [K in \`\${TArchitecture}.expert_used_count\`]: number };
+/** This file is auto-generated by generate-llm.ts */
+
+import type { ModelBase, GGUFGeneralInfo } from "./types";
+
+type Attention<TArchitecture extends string> = Record<
+	| \`\${TArchitecture}.attention.head_count\`
+	| \`\${TArchitecture}.attention.head_count_kv\`
+	| \`\${TArchitecture}.attention.layer_norm_epsilon\`
+	| \`\${TArchitecture}.attention.layer_norm_rms_epsilon\`
+	| \`\${TArchitecture}.attention.alibi_bias_max\`
+	| \`\${TArchitecture}.attention.clip_kqv\`
+	| \`\${TArchitecture}.attention.use_norm\`,
+	number
+>;
+
+type Rope<TArchitecture extends LLMArchitecture> = Record<
+	| \`\${TArchitecture}.rope.dimension_count\`
+	| \`\${TArchitecture}.rope.freq_base\`
+	| \`\${TArchitecture}.rope.scale\`
+	| \`\${TArchitecture}.rope.scale_linear\`,
+	number
+>;
+
+type MOE<TArchitecture extends LLMArchitecture> = Record<
+	| \`\${TArchitecture}.expert_count\`
+	| \`\${TArchitecture}.expert_used_count\`,
+	number
+>;
 
 export type TransformerLLMArchitecture = LLMArchitecture; // type alias
-export type TransformerLLMBase<TArchitecture extends LLMArchitecture> = ModelBase<TArchitecture>
-	& MOE<TArchitecture>
+export type TransformerLLMBase<TArchitecture extends LLMArchitecture> = GGUFGeneralInfo<TArchitecture>
+	& ModelBase<TArchitecture>
+	& Partial<MOE<TArchitecture>>
 	& Attention<TArchitecture>
 	& Rope<TArchitecture>;
 
@@ -163,15 +174,11 @@ async function main() {
 	/////////////////////////////////////
 	// write result to file
 	const content = [
-		"/** This file is auto-generated by generate-llm.ts */",
-		"",
-		'import type { ModelBase } from "./types";',
-		"",
+		DEST_COMMON_SOURCE,
 		"export const LLM_ARCHITECTURES = [",
 		...archList.map((a) => `\t${JSON.stringify(a.name)},`),
 		"] as const;",
 		"type LLMArchitecture = (typeof LLM_ARCHITECTURES)[number];",
-		DEST_COMMON_SOURCE,
 		...archList.map((a) => {
 			let code = `export type ${a.tsName} = TransformerLLMBase<${JSON.stringify(a.name)}>`;
 			if (a.hparams.length) {
diff --git a/packages/gguf/src/transformer-llm.ts b/packages/gguf/src/transformer-llm.ts
@@ -1,6 +1,44 @@
 /** This file is auto-generated by generate-llm.ts */
 
-import type { ModelBase } from "./types";
+import type { ModelBase, GGUFGeneralInfo } from "./types";
+
+type Attention<TArchitecture extends string> = Record<
+	| `${TArchitecture}.attention.head_count`
+	| `${TArchitecture}.attention.head_count_kv`
+	| `${TArchitecture}.attention.layer_norm_epsilon`
+	| `${TArchitecture}.attention.layer_norm_rms_epsilon`
+	| `${TArchitecture}.attention.alibi_bias_max`
+	| `${TArchitecture}.attention.clip_kqv`
+	| `${TArchitecture}.attention.use_norm`,
+	number
+>;
+
+type Rope<TArchitecture extends LLMArchitecture> = Record<
+	| `${TArchitecture}.rope.dimension_count`
+	| `${TArchitecture}.rope.freq_base`
+	| `${TArchitecture}.rope.scale`
+	| `${TArchitecture}.rope.scale_linear`,
+	number
+>;
+
+type MOE<TArchitecture extends LLMArchitecture> = Record<
+	`${TArchitecture}.expert_count` | `${TArchitecture}.expert_used_count`,
+	number
+>;
+
+export type TransformerLLMArchitecture = LLMArchitecture; // type alias
+export type TransformerLLMBase<TArchitecture extends LLMArchitecture> = GGUFGeneralInfo<TArchitecture> &
+	ModelBase<TArchitecture> &
+	Partial<MOE<TArchitecture>> &
+	Attention<TArchitecture> &
+	Rope<TArchitecture>;
+
+export enum TransformerLLMPoolingType {
+	UNSPECIFIED = -1,
+	NONE = 0,
+	MEAN = 1,
+	CLS = 2,
+}
 
 export const LLM_ARCHITECTURES = [
 	"llama",
@@ -37,36 +75,6 @@ export const LLM_ARCHITECTURES = [
 	"olmo",
 ] as const;
 type LLMArchitecture = (typeof LLM_ARCHITECTURES)[number];
-
-type Attention<TArchitecture extends string> = { [K in `${TArchitecture}.attention.head_count`]: number } & {
-	[K in `${TArchitecture}.attention.head_count_kv`]: number;
-} & { [K in `${TArchitecture}.attention.layer_norm_epsilon`]: number } & {
-	[K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number;
-} & { [K in `${TArchitecture}.attention.alibi_bias_max`]: number } & {
-	[K in `${TArchitecture}.attention.clip_kqv`]: number;
-} & { [K in `${TArchitecture}.attention.use_norm`]: number };
-
-type Rope<TArchitecture extends LLMArchitecture> = { [K in `${TArchitecture}.rope.dimension_count`]: number } & {
-	[K in `${TArchitecture}.rope.freq_base`]: number;
-} & { [K in `${TArchitecture}.rope.scale`]: number } & { [K in `${TArchitecture}.rope.scale_linear`]: number };
-
-type MOE<TArchitecture extends LLMArchitecture> = { [K in `${TArchitecture}.expert_count`]: number } & {
-	[K in `${TArchitecture}.expert_used_count`]: number;
-};
-
-export type TransformerLLMArchitecture = LLMArchitecture; // type alias
-export type TransformerLLMBase<TArchitecture extends LLMArchitecture> = ModelBase<TArchitecture> &
-	MOE<TArchitecture> &
-	Attention<TArchitecture> &
-	Rope<TArchitecture>;
-
-export enum TransformerLLMPoolingType {
-	UNSPECIFIED = -1,
-	NONE = 0,
-	MEAN = 1,
-	CLS = 2,
-}
-
 export type ArchLlama = TransformerLLMBase<"llama"> & {
 	"llama.attention.layer_norm_rms_epsilon": number;
 };
diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts
@@ -50,21 +50,28 @@ export enum GGUFValueType {
 const ARCHITECTURES = [...LLM_ARCHITECTURES, "rwkv", "whisper"] as const;
 export type Architecture = (typeof ARCHITECTURES)[number];
 
-interface General {
-	"general.architecture": Architecture;
+export interface GGUFGeneralInfo<TArchitecture extends Architecture> {
+	"general.architecture": TArchitecture;
 	"general.name": string;
-	"general.file_type": number;
-	"general.quantization_version": number;
+	"general.file_type"?: number;
+	"general.quantization_version"?: number;
 }
 
 export type ModelBase<
 	TArchitecture extends
 		| Architecture
 		| `encoder.${Extract<Architecture, "whisper">}`
 		| `decoder.${Extract<Architecture, "whisper">}`,
-> = { [K in `${TArchitecture}.layer_count`]: number } & { [K in `${TArchitecture}.feed_forward_length`]: number } & {
-	[K in `${TArchitecture}.context_length`]: number;
-} & { [K in `${TArchitecture}.embedding_length`]: number } & { [K in `${TArchitecture}.block_count`]: number };
+> = Record<
+	| `${TArchitecture}.layer_count`
+	| `${TArchitecture}.feed_forward_length`
+	| `${TArchitecture}.context_length`
+	| `${TArchitecture}.embedding_length`
+	| `${TArchitecture}.block_count`,
+	number
+>;
+
+/// Tokenizer
 
 type TokenizerModel = "no_vocab" | "llama" | "gpt2" | "bert";
 interface Tokenizer {
@@ -77,18 +84,22 @@ interface Tokenizer {
 	"tokenizer.ggml.add_bos_token": boolean;
 	"tokenizer.chat_template": string;
 }
+type NoTokenizer = Record<keyof Tokenizer, undefined>;
+
+/// Models outside of llama.cpp: "rwkv" and "whisper"
+
+export type RWKV = GGUFGeneralInfo<"rwkv"> & ModelBase<"rwkv"> & { "rwkv.architecture_version": number };
+
+export type Whisper = GGUFGeneralInfo<"whisper"> & ModelBase<"encoder.whisper"> & ModelBase<"decoder.whisper">;
 
-export type RWKV = ModelBase<"rwkv"> & { "rwkv.architecture_version": number };
-export type LLM = TransformerLLM | RWKV;
-export type Whisper = ModelBase<"encoder.whisper"> & ModelBase<"decoder.whisper">;
-export type Model = (LLM | Whisper) & Partial<Tokenizer>;
+/// Types for parse output
 
 export type GGUFMetadata = {
 	version: Version;
 	tensor_count: bigint;
 	kv_count: bigint;
-} & Partial<General> &
-	Partial<Model> &
+} & (Whisper | RWKV | TransformerLLM) &
+	(NoTokenizer | Tokenizer) &
 	Record<string, MetadataValue>;
 
 export interface GGUFTensorInfo {