|
| 1 | +import { GGMLQuantizationType } from "./types"; |
| 2 | + |
| 3 | +export const QUANT_DESCRIPTIONS: Record<GGMLQuantizationType, { txt: string; src_url?: string }> = { |
| 4 | + [GGMLQuantizationType.F32]: { |
| 5 | + txt: "32-bit standard IEEE 754 single-precision floating-point number.", |
| 6 | + src_url: "https://en.wikipedia.org/wiki/Single-precision_floating-point_format", |
| 7 | + }, |
| 8 | + [GGMLQuantizationType.F16]: { |
| 9 | + txt: "16-bit standard IEEE 754 half-precision floating-point number.", |
| 10 | + src_url: "https://en.wikipedia.org/wiki/Half-precision_floating-point_format", |
| 11 | + }, |
| 12 | + [GGMLQuantizationType.Q4_0]: { |
| 13 | + txt: "4-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale. Legacy quantization method (not used widely as of today)", |
| 14 | + src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557654249", |
| 15 | + }, |
| 16 | + [GGMLQuantizationType.Q4_1]: { |
| 17 | + txt: "4-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale + block_minimum. Legacy quantization method (not used widely as of today)", |
| 18 | + src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557682290", |
| 19 | + }, |
| 20 | + [GGMLQuantizationType.Q5_0]: { |
| 21 | + txt: "5-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale. Legacy quantization method (not used widely as of today)", |
| 22 | + src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557654249", |
| 23 | + }, |
| 24 | + [GGMLQuantizationType.Q5_1]: { |
| 25 | + txt: "5-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale + block_minimum. Legacy quantization method (not used widely as of today)", |
| 26 | + src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557682290", |
| 27 | + }, |
| 28 | + [GGMLQuantizationType.Q8_0]: { |
| 29 | + txt: "8-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale. Legacy quantization method (not used widely as of today)", |
| 30 | + src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557654249", |
| 31 | + }, |
| 32 | + [GGMLQuantizationType.Q8_1]: { |
| 33 | + txt: "8-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale + block_minimum. Legacy quantization method (not used widely as of today)", |
| 34 | + src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557682290", |
| 35 | + }, |
| 36 | + [GGMLQuantizationType.Q2_K]: { |
| 37 | + txt: `2-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weight. Weight formula: w = q * block_scale(4-bit) + block_min(4-bit), resulting in 2.5625 bits-per-weight.`, |
| 38 | + src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", |
| 39 | + }, |
| 40 | + [GGMLQuantizationType.Q3_K]: { |
| 41 | + txt: `3-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weights. Weight formula: w = q * block_scale(6-bit), resulting. 3.4375 bits-per-weight`, |
| 42 | + src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", |
| 43 | + }, |
| 44 | + [GGMLQuantizationType.Q4_K]: { |
| 45 | + txt: `4-bit quantization (q). Super-blocks with 8 blocks, each block has 32 weights. Weight formula: w = q * block_scale(6-bit) + block_min(6-bit), resulting in 4.5 bits-per-weight.`, |
| 46 | + src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", |
| 47 | + }, |
| 48 | + [GGMLQuantizationType.Q5_K]: { |
| 49 | + txt: `5-bit quantization (q). Super-blocks with 8 blocks, each block has 32 weights. Weight formula: w = q * block_scale(6-bit) + block_min(6-bit), resulting in 5.5 bits-per-weight.`, |
| 50 | + src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", |
| 51 | + }, |
| 52 | + [GGMLQuantizationType.Q6_K]: { |
| 53 | + txt: `6-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weights. Weight formula: w = q * block_scale(8-bit), resulting in 6.5625 bits-per-weight.`, |
| 54 | + src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", |
| 55 | + }, |
| 56 | + [GGMLQuantizationType.Q8_K]: { |
| 57 | + txt: `8-bit quantization (q). Each block has 256 weights. Only used for quantizing intermediate results. All 2-6 bit dot products are implemented for this quantization type. Weight formula: w = q * block_scale.`, |
| 58 | + src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", |
| 59 | + }, |
| 60 | + [GGMLQuantizationType.IQ2_XXS]: { |
| 61 | + txt: "2-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 2.06 bits-per-weight.", |
| 62 | + src_url: |
| 63 | + "https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", |
| 64 | + }, |
| 65 | + [GGMLQuantizationType.IQ2_XS]: { |
| 66 | + txt: "2-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 2.31 bits-per-weight.", |
| 67 | + src_url: |
| 68 | + "https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", |
| 69 | + }, |
| 70 | + [GGMLQuantizationType.IQ3_XXS]: { |
| 71 | + txt: "3-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 3.06 bits-per-weight.", |
| 72 | + src_url: |
| 73 | + "https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", |
| 74 | + }, |
| 75 | + [GGMLQuantizationType.IQ1_S]: { |
| 76 | + txt: "1-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 1.56 bits-per-weight.", |
| 77 | + src_url: |
| 78 | + "https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", |
| 79 | + }, |
| 80 | + [GGMLQuantizationType.IQ4_NL]: { |
| 81 | + txt: "4-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix", |
| 82 | + }, |
| 83 | + [GGMLQuantizationType.IQ3_S]: { |
| 84 | + txt: "3-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 3.44 bits-per-weight.", |
| 85 | + src_url: |
| 86 | + "https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", |
| 87 | + }, |
| 88 | + [GGMLQuantizationType.IQ2_S]: { |
| 89 | + txt: "2-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 2.5 bits-per-weight.", |
| 90 | + src_url: |
| 91 | + "https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", |
| 92 | + }, |
| 93 | + [GGMLQuantizationType.IQ4_XS]: { |
| 94 | + txt: "4-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 4.25 bits-per-weight.", |
| 95 | + src_url: |
| 96 | + "https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", |
| 97 | + }, |
| 98 | +}; |
0 commit comments