huggingface · mishig25 · May 2, 2024 · Apr 27, 2024 · Apr 27, 2024 · Apr 29, 2024
@@ -32,6 +32,7 @@
 		"format:check": "prettier --check .",
 		"prepublishOnly": "pnpm run build",
 		"build": "tsup src/index.ts --format cjs,esm --clean --dts",
+		"build:llm": "tsx scripts/generate-llm.ts && pnpm run format",
 		"test": "vitest run",
 		"check": "tsc"
 	},

@@ -0,0 +1,202 @@
+/**
+ * Script for generating llm.ts
+ * The source data is taken from llama.cpp
+ */
+
+import { writeFileSync } from "node:fs";
+
+const SOURCE_CPP_URL = "https://raw.githubusercontent.com/ggerganov/llama.cpp/master/llama.cpp";
+const DEST_FILE_PATH = "./src/transformer-llm.ts";
+const DEST_COMMON_SOURCE = `
+type Attention<TArchitecture extends string> =
+	& { [K in \`\${TArchitecture}.attention.head_count\`]: number }
+	& { [K in \`\${TArchitecture}.attention.head_count_kv\`]: number }
+	& { [K in \`\${TArchitecture}.attention.layer_norm_epsilon\`]: number }
+	& { [K in \`\${TArchitecture}.attention.layer_norm_rms_epsilon\`]: number }
+	& { [K in \`\${TArchitecture}.attention.alibi_bias_max\`]: number }
+	& { [K in \`\${TArchitecture}.attention.clip_kqv\`]: number }
+	& { [K in \`\${TArchitecture}.attention.use_norm\`]: number };
+
+type Rope<TArchitecture extends LLMArchitecture> =
+	& { [K in \`\${TArchitecture}.rope.dimension_count\`]: number }
+	& { [K in \`\${TArchitecture}.rope.freq_base\`]: number }
+	& { [K in \`\${TArchitecture}.rope.scale\`]: number }
+	& { [K in \`\${TArchitecture}.rope.scale_linear\`]: number };
+
+type MOE<TArchitecture extends LLMArchitecture> =
+	& { [K in \`\${TArchitecture}.expert_count\`]: number }
+	& { [K in \`\${TArchitecture}.expert_used_count\`]: number };
+
+export type TransformerLLMArchitecture = LLMArchitecture; // type alias
+export type TransformerLLMBase<TArchitecture extends LLMArchitecture> = ModelBase<TArchitecture>
+	& MOE<TArchitecture>
+	& Attention<TArchitecture>
+	& Rope<TArchitecture>;
+
+export enum TransformerLLMPoolingType {
+	UNSPECIFIED = -1,
+	NONE = 0,
+	MEAN = 1,
+	CLS = 2,
+};
+`;
+
+const KV_TYPE = {
+	LLM_KV_ATTENTION_LAYERNORM_RMS_EPS: "number",
+	LLM_KV_ATTENTION_LAYERNORM_EPS: "number",
+	LLM_KV_ATTENTION_CAUSAL: "boolean",
+	LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT: "number",
+	LLM_KV_POOLING_TYPE: "TransformerLLMPoolingType",
+	LLM_KV_ATTENTION_CLAMP_KQV: "number",
+	LLM_KV_ATTENTION_MAX_ALIBI_BIAS: "number",
+	LLM_KV_SSM_CONV_KERNEL: "number",
+	LLM_KV_SSM_INNER_SIZE: "number",
+	LLM_KV_SSM_STATE_SIZE: "number",
+	LLM_KV_SSM_TIME_STEP_RANK: "number",
+	LLM_KV_LOGIT_SCALE: "number",
+};
+
+interface Arch {
+	cppConst: string; // for example: "LLM_ARCH_LLAMA"
+	name: string; // for example: "llama"
+	tsName: string; // for example: "ArchLlama"
+	tensorNames: string[]; // for example: "token_embd"
+	hparams: string[];
+}
+
+async function main() {
+	const res = await fetch(SOURCE_CPP_URL);
+	const cppSource = await res.text();
+
+	/////////////////////////////////////
+	// extract list of all architectures
+	const archList: Arch[] = [];
+	const RE_ARCH_NAME = /LLM_ARCH_[A-Z0-9_]+/;
+	const matchedArchList = cppSource.match(/LLM_ARCH_NAMES = (?<names>[^;]+)/)?.groups?.names.split("\n");
+	if (!matchedArchList?.length) {
+		throw new Error("LLM_ARCH_NAMES is empty");
+	}
+	for (const line of matchedArchList) {
+		const matched = line.match(/(?<cppConst>LLM_ARCH_[A-Z0-9_]+),\s+"(?<name>.+?)"/);
+		if (matched?.groups && !matched.groups.name.match(/unknown/)) {
+			archList.push({
+				cppConst: matched.groups.cppConst,
+				name: matched.groups.name,
+				tsName: snakeToPascal(matched.groups.cppConst.replace("LLM_", "")),
+				tensorNames: [],
+				hparams: [],
+			});
+		}
+	}
+
+	/////////////////////////////////////
+	// extract map constant name to kv name
+	// for example: LLM_KV_ATTENTION_LAYERNORM_RMS_EPS ==> "%s.attention.layer_norm_rms_epsilon"
+	const constToKVName: { [cppConst: string]: string } = {};
+	const matchedKVList = cppSource.match(/LLM_KV_NAMES = (?<names>[^;]+)/)?.groups?.names.split("\n");
+	if (!matchedKVList?.length) {
+		throw new Error("LLM_KV_NAMES is empty");
+	}
+	for (const line of matchedKVList) {
+		const matched = line.match(/(?<cppConst>LLM_KV_[A-Z0-9_]+)[,\s]+"(?<name>.+?)"/);
+		if (matched?.groups) {
+			constToKVName[matched.groups.cppConst] = matched.groups.name;
+		}
+	}
+
+	/////////////////////////////////////
+	// extract list of tensor names based on architecture
+	// TODO: unused for now
+	const matchedTensorList = cppSource.match(/LLM_TENSOR_NAMES = (?<names>[^;]+)/)?.groups?.names.split("\n");
+	if (!matchedTensorList?.length) {
+		throw new Error("LLM_TENSOR_NAMES is empty");
+	}
+	let currCppConst = "";
+	for (const line of matchedTensorList) {
+		// check if current line has LLM_ARCH_*
+		const cppConst = line.match(RE_ARCH_NAME)?.[0];
+		if (cppConst) {
+			currCppConst = cppConst;
+			continue;
+		}
+		// check if current line has LLM_TENSOR_*
+		const tensorMatched = line.match(/LLM_TENSOR_[A-Z0-9_]+[,\s]+"(?<name>.+?)"/);
+		if (tensorMatched?.groups) {
+			const arch = archList.find((a) => a.cppConst === currCppConst);
+			if (arch) arch.tensorNames.push(tensorMatched.groups.name);
+		}
+	}
+
+	/////////////////////////////////////
+	// extract list of hyper params based on architecture
+	let insideLoadHParamsFn = false;
+	currCppConst = "";
+	for (const line of cppSource.split("\n")) {
+		// check if current line is function llm_load_hparams()
+		if (line.startsWith("static void llm_load_hparams")) {
+			insideLoadHParamsFn = true;
+		}
+		if (!insideLoadHParamsFn) {
+			continue;
+		}
+		// check if current line has LLM_ARCH_*
+		const RE_CASE = new RegExp(`case (${RE_ARCH_NAME.source})`);
+		const cppConst = line.match(RE_CASE)?.[1];
+		if (cppConst) {
+			currCppConst = cppConst;
+			continue;
+		}
+		// check if current line has get_key(...)
+		const keyConst = line.match(/LLM_KV_[A-Z0-9_]+/)?.[0];
+		if (keyConst) {
+			const arch = archList.find((a) => a.cppConst === currCppConst);
+			if (arch) {
+				arch.hparams.push(keyConst);
+			}
+		}
+		// check if current line is end-of-function
+		if (line === "}") {
+			break;
+		}
+	}
+
+	/////////////////////////////////////
+	// write result to file
+	const content = [
+		"/** This file is auto-generated by generate-llm.ts */",
+		"",
+		'import type { ModelBase } from "./types";',
+		"",
+		"export const LLM_ARCHITECTURES = [",
+		...archList.map((a) => `\t${JSON.stringify(a.name)},`),
+		"] as const;",
+		"type LLMArchitecture = (typeof LLM_ARCHITECTURES)[number];",
+		DEST_COMMON_SOURCE,
+		...archList.map((a) => {
+			let code = `export type ${a.tsName} = TransformerLLMBase<${JSON.stringify(a.name)}>`;
+			if (a.hparams.length) {
+				code += [
+					" & {",
+					...a.hparams.map((k) => `\t${JSON.stringify(constToKVName[k].replace("%s", a.name))}: ${KV_TYPE[k]},`),
+					"};",
+				].join("\n");
+			} else {
+				code += ";";
+			}
+			return code;
+		}),
+		"",
+		`export type TransformerLLM = ${archList.map((a) => a.tsName).join(" | ")};`,
+	].join("\n");
+
+	writeFileSync(DEST_FILE_PATH, content);
+}
+
+function snakeToPascal(str: string) {
+	return str
+		.split("_")
+		.map((word) => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
+		.join("");
+}
+
+main();