Skip to content

Commit 94e0aed

Browse files
committed
cleaner & shorter
1 parent b1aeff5 commit 94e0aed

File tree

1 file changed

+19
-13
lines changed

1 file changed

+19
-13
lines changed

packages/gguf/src/quant_descriptions.ts

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,24 @@ import { GGMLQuantizationType } from "./types";
33
export const QUANT_DESCRIPTIONS: Record<GGMLQuantizationType, string> = {
44
[GGMLQuantizationType.F32]: "32-bit standard IEEE 754 single-precision floating-point number.", // src: https://en.wikipedia.org/wiki/Single-precision_floating-point_format
55
[GGMLQuantizationType.F16]: "16-bit standard IEEE 754 half-precision floating-point number.", // src: https://en.wikipedia.org/wiki/Half-precision_floating-point_format
6-
[GGMLQuantizationType.Q4_0]: "4-bit round-to-nearest quantization (q). Each block has 32 weights. Weights are obtained by w = q * block_scale_factor. Legacy quantization method (not used widely as of today)",
7-
[GGMLQuantizationType.Q4_1]: "4-bit round-to-nearest quantization (q). Each block has 32 weights. Weights are obtained by w = q * block_scale_factor + block_minimum. Legacy quantization method (not used widely as of today)",
8-
[GGMLQuantizationType.Q5_0]: "5-bit round-to-nearest quantization (q). Each block has 32 weights. Weights are obtained by w = q * block_scale_factor. Legacy quantization method (not used widely as of today)",
9-
[GGMLQuantizationType.Q5_1]: "5-bit round-to-nearest quantization (q). Each block has 32 weights. Weights are obtained by w = q * block_scale_factor + block_minimum. Legacy quantization method (not used widely as of today)",
10-
[GGMLQuantizationType.Q8_0]: "8-bit round-to-nearest quantization (q). Each block has 32 weights. Weights are obtained by w = q * block_scale_factor. Legacy quantization method (not used widely as of today)",
11-
[GGMLQuantizationType.Q8_1]: "8-bit round-to-nearest quantization (q). Each block has 32 weights. Weights are obtained by w = q * block_scale_factor + block_minimum. Legacy quantization method (not used widely as of today)",
12-
[GGMLQuantizationType.Q2_K]: `2-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weight. Block scales (d) & mins (m) are quantized with 4 bits, resulting in 2.5625 bits-per-weight. Weights are obtained by w = d * q + m.`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305
13-
[GGMLQuantizationType.Q3_K]: `3-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weights. Block scales (d) is quantized with 6 bits, resulting. 3.4375 bits-per-weight. Weights are obtained by w = d * q.`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305
14-
[GGMLQuantizationType.Q4_K]: `4-bit quantization (q). Super-blocks with 8 blocks, each block has 32 weights. Block scales (d) & mins (m) are quantized with 6 bits, resulting. 4.5 bits-per-weight are obtained by w = d * q + m.`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305
15-
[GGMLQuantizationType.Q5_K]: `5-bit quantization (q). Super-blocks with 8 blocks, each block has 32 weights. Block scales (d) & mins (m) are quantized with 6 bits, resulting in 5.5 bits-per-weight. Weights are obtained by w = d * q + m.`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305
16-
[GGMLQuantizationType.Q6_K]: `6-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weights. Block scales (d) is quantized with 8 bits, resulting in 6.5625 bits-per-weight. Weights are obtained by w = d * q.`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305
17-
[GGMLQuantizationType.Q8_K]: `8-bit quantization (q). Each block has 256 weights. Only used for quantizing intermediate results. All 2-6 bit dot products are implemented for this quantization type. Weights are obtained by w = q * block_scale_factor.`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305
6+
[GGMLQuantizationType.Q4_0]:
7+
"4-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale. Legacy quantization method (not used widely as of today)",
8+
[GGMLQuantizationType.Q4_1]:
9+
"4-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale + block_minimum. Legacy quantization method (not used widely as of today)",
10+
[GGMLQuantizationType.Q5_0]:
11+
"5-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale. Legacy quantization method (not used widely as of today)",
12+
[GGMLQuantizationType.Q5_1]:
13+
"5-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale + block_minimum. Legacy quantization method (not used widely as of today)",
14+
[GGMLQuantizationType.Q8_0]:
15+
"8-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale. Legacy quantization method (not used widely as of today)",
16+
[GGMLQuantizationType.Q8_1]:
17+
"8-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale + block_minimum. Legacy quantization method (not used widely as of today)",
18+
[GGMLQuantizationType.Q2_K]: `2-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weight. Weight formula: w = q * block_scale(4-bit) + block_min(4-bit), resulting in 2.5625 bits-per-weight.`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305
19+
[GGMLQuantizationType.Q3_K]: `3-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weights. Weight formula: w = q * block_scale(6-bit), resulting. 3.4375 bits-per-weight`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305
20+
[GGMLQuantizationType.Q4_K]: `4-bit quantization (q). Super-blocks with 8 blocks, each block has 32 weights. Weight formula: w = q * block_scale(6-bit) + block_min(6-bit), resulting in 4.5 bits-per-weight.`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305
21+
[GGMLQuantizationType.Q5_K]: `5-bit quantization (q). Super-blocks with 8 blocks, each block has 32 weights. Weight formula: w = q * block_scale(6-bit) + block_min(6-bit), resulting in 5.5 bits-per-weight.`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305
22+
[GGMLQuantizationType.Q6_K]: `6-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weights. Weight formula: w = q * block_scale(8-bit), resulting in 6.5625 bits-per-weight.`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305
23+
[GGMLQuantizationType.Q8_K]: `8-bit quantization (q). Each block has 256 weights. Only used for quantizing intermediate results. All 2-6 bit dot products are implemented for this quantization type. Weight formula: w = q * block_scale.`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305
1824
[GGMLQuantizationType.IQ2_XXS]: "", // todo: add description
1925
[GGMLQuantizationType.IQ2_XS]: "", // todo: add description
2026
[GGMLQuantizationType.IQ3_XXS]: "", // todo: add description
@@ -23,4 +29,4 @@ export const QUANT_DESCRIPTIONS: Record<GGMLQuantizationType, string> = {
2329
[GGMLQuantizationType.IQ3_S]: "", // todo: add description
2430
[GGMLQuantizationType.IQ2_S]: "", // todo: add description
2531
[GGMLQuantizationType.IQ4_XS]: "", // todo: add description
26-
};
32+
};

0 commit comments

Comments
 (0)