Skip to content

Commit 29b85e6

Browse files
committed
wip: 1.625 bpw ternary packing scheme
1 parent e112b61 commit 29b85e6

File tree

11 files changed

+593
-4
lines changed

11 files changed

+593
-4
lines changed

convert-hf-to-gguf.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -294,12 +294,27 @@ def write_tensors(self):
294294
))
295295

296296
if self.ftype != gguf.LlamaFileType.ALL_F32 and extra_f16 and not extra_f32:
297-
if self.ftype == gguf.LlamaFileType.MOSTLY_BF16:
297+
if self.ftype == gguf.LlamaFileType.MOSTLY_Q1_3 and not any(
298+
self.match_model_tensor_name(new_name, key, None)
299+
for key in [
300+
gguf.MODEL_TENSOR.TOKEN_EMBD,
301+
gguf.MODEL_TENSOR.OUTPUT,
302+
]
303+
):
304+
data = gguf.quantize_q1_3(data)
305+
assert data.dtype == np.uint8
306+
data_qtype = gguf.GGMLQuantizationType.Q1_3
307+
308+
elif self.ftype == gguf.LlamaFileType.MOSTLY_BF16:
298309
data = gguf.quantize_bf16(data)
299310
assert data.dtype == np.int16
300311
data_qtype = gguf.GGMLQuantizationType.BF16
301312

302-
elif self.ftype == gguf.LlamaFileType.MOSTLY_Q8_0 and gguf.can_quantize_to_q8_0(data):
313+
elif (
314+
self.ftype == gguf.LlamaFileType.MOSTLY_Q8_0
315+
or self.ftype == gguf.LlamaFileType.MOSTLY_Q1_3
316+
and gguf.can_quantize_to_q8_0(data)
317+
):
303318
data = gguf.quantize_q8_0(data)
304319
assert data.dtype == np.uint8
305320
data_qtype = gguf.GGMLQuantizationType.Q8_0
@@ -1408,6 +1423,12 @@ def write_tensors(self):
14081423
class BitnetModel(Model):
14091424
model_arch = gguf.MODEL_ARCH.BITNET
14101425

1426+
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool, model_name: str | None):
1427+
if ftype == gguf.LlamaFileType.GUESSED:
1428+
ftype = gguf.LlamaFileType.MOSTLY_Q1_3
1429+
1430+
super().__init__(dir_model, ftype, fname_out, is_big_endian, use_temp_file, eager, model_name)
1431+
14111432
def set_vocab(self):
14121433
self._set_vocab_sentencepiece()
14131434

examples/quantize/quantize.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
2626
{ "IQ2_M", LLAMA_FTYPE_MOSTLY_IQ2_M, " 2.7 bpw quantization", },
2727
{ "IQ1_S", LLAMA_FTYPE_MOSTLY_IQ1_S, " 1.56 bpw quantization", },
2828
{ "IQ1_M", LLAMA_FTYPE_MOSTLY_IQ1_M, " 1.75 bpw quantization", },
29+
{ "Q1_3", LLAMA_FTYPE_MOSTLY_Q1_3, " 1.63 bpw for BitNet 1.58b", },
30+
{ "Q2_2", LLAMA_FTYPE_MOSTLY_Q2_2, " 2.00 bpw for BitNet 1.58b", },
2931
{ "Q2_K", LLAMA_FTYPE_MOSTLY_Q2_K, " 2.96G, +3.5199 ppl @ Llama-3-8B", },
3032
{ "Q2_K_S", LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.96G, +3.1836 ppl @ Llama-3-8B", },
3133
{ "IQ3_XXS",LLAMA_FTYPE_MOSTLY_IQ3_XXS," 3.06 bpw quantization", },

ggml-common.h

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,20 @@ typedef sycl::half2 ggml_half2;
137137

138138
#endif // GGML_COMMON_DECL_CUDA || GGML_COMMON_DECL_HIP
139139

140+
// 1.625 bpw for BitNet 1.58b models
141+
#define QK1_3 64
142+
typedef struct {
143+
uint8_t q[(QK1_3 - 4*QK1_3/64)/5]; // 5 elements per byte (3^5 = 243 < 256)
144+
uint8_t qs[QK1_3/64]; // 4 elements per byte
145+
} block_q1_3;
146+
static_assert(sizeof(block_q1_3) == (QK1_3 - 4*QK1_3/64)/5 + QK1_3/64, "wrong q1_3 block size/padding");
147+
148+
#define QK2_2 32
149+
typedef struct {
150+
uint8_t qs[QK2_2 / 4]; // nibbles / quants
151+
} block_q2_2;
152+
static_assert(sizeof(block_q2_2) == QK2_2 / 4, "wrong q2_2 block size/padding");
153+
140154
#define QK4_0 32
141155
typedef struct {
142156
ggml_half d; // delta
@@ -333,6 +347,7 @@ typedef struct {
333347
} block_iq3_s;
334348
static_assert(sizeof(block_iq3_s) == sizeof(ggml_half) + 13*(QK_K/32) + IQ3S_N_SCALE, "wrong iq3_s block size/padding");
335349

350+
// 1.5625 bpw
336351
typedef struct {
337352
ggml_half d;
338353
uint8_t qs[QK_K/8];
@@ -1022,6 +1037,108 @@ GGML_TABLE_BEGIN(uint32_t, iq3s_grid, 512)
10221037
0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101,
10231038
GGML_TABLE_END()
10241039

1040+
GGML_TABLE_BEGIN(uint32_t, q22_grid, 256)
1041+
0x00000000, 0x01000000, 0x00000000, 0xff000000,
1042+
0x00010000, 0x01010000, 0x00010000, 0xff010000,
1043+
0x00000000, 0x01000000, 0x00000000, 0xff000000,
1044+
0x00ff0000, 0x01ff0000, 0x00ff0000, 0xffff0000,
1045+
0x00000100, 0x01000100, 0x00000100, 0xff000100,
1046+
0x00010100, 0x01010100, 0x00010100, 0xff010100,
1047+
0x00000100, 0x01000100, 0x00000100, 0xff000100,
1048+
0x00ff0100, 0x01ff0100, 0x00ff0100, 0xffff0100,
1049+
0x00000000, 0x01000000, 0x00000000, 0xff000000,
1050+
0x00010000, 0x01010000, 0x00010000, 0xff010000,
1051+
0x00000000, 0x01000000, 0x00000000, 0xff000000,
1052+
0x00ff0000, 0x01ff0000, 0x00ff0000, 0xffff0000,
1053+
0x0000ff00, 0x0100ff00, 0x0000ff00, 0xff00ff00,
1054+
0x0001ff00, 0x0101ff00, 0x0001ff00, 0xff01ff00,
1055+
0x0000ff00, 0x0100ff00, 0x0000ff00, 0xff00ff00,
1056+
0x00ffff00, 0x01ffff00, 0x00ffff00, 0xffffff00,
1057+
0x00000001, 0x01000001, 0x00000001, 0xff000001,
1058+
0x00010001, 0x01010001, 0x00010001, 0xff010001,
1059+
0x00000001, 0x01000001, 0x00000001, 0xff000001,
1060+
0x00ff0001, 0x01ff0001, 0x00ff0001, 0xffff0001,
1061+
0x00000101, 0x01000101, 0x00000101, 0xff000101,
1062+
0x00010101, 0x01010101, 0x00010101, 0xff010101,
1063+
0x00000101, 0x01000101, 0x00000101, 0xff000101,
1064+
0x00ff0101, 0x01ff0101, 0x00ff0101, 0xffff0101,
1065+
0x00000001, 0x01000001, 0x00000001, 0xff000001,
1066+
0x00010001, 0x01010001, 0x00010001, 0xff010001,
1067+
0x00000001, 0x01000001, 0x00000001, 0xff000001,
1068+
0x00ff0001, 0x01ff0001, 0x00ff0001, 0xffff0001,
1069+
0x0000ff01, 0x0100ff01, 0x0000ff01, 0xff00ff01,
1070+
0x0001ff01, 0x0101ff01, 0x0001ff01, 0xff01ff01,
1071+
0x0000ff01, 0x0100ff01, 0x0000ff01, 0xff00ff01,
1072+
0x00ffff01, 0x01ffff01, 0x00ffff01, 0xffffff01,
1073+
0x00000000, 0x01000000, 0x00000000, 0xff000000,
1074+
0x00010000, 0x01010000, 0x00010000, 0xff010000,
1075+
0x00000000, 0x01000000, 0x00000000, 0xff000000,
1076+
0x00ff0000, 0x01ff0000, 0x00ff0000, 0xffff0000,
1077+
0x00000100, 0x01000100, 0x00000100, 0xff000100,
1078+
0x00010100, 0x01010100, 0x00010100, 0xff010100,
1079+
0x00000100, 0x01000100, 0x00000100, 0xff000100,
1080+
0x00ff0100, 0x01ff0100, 0x00ff0100, 0xffff0100,
1081+
0x00000000, 0x01000000, 0x00000000, 0xff000000,
1082+
0x00010000, 0x01010000, 0x00010000, 0xff010000,
1083+
0x00000000, 0x01000000, 0x00000000, 0xff000000,
1084+
0x00ff0000, 0x01ff0000, 0x00ff0000, 0xffff0000,
1085+
0x0000ff00, 0x0100ff00, 0x0000ff00, 0xff00ff00,
1086+
0x0001ff00, 0x0101ff00, 0x0001ff00, 0xff01ff00,
1087+
0x0000ff00, 0x0100ff00, 0x0000ff00, 0xff00ff00,
1088+
0x00ffff00, 0x01ffff00, 0x00ffff00, 0xffffff00,
1089+
0x000000ff, 0x010000ff, 0x000000ff, 0xff0000ff,
1090+
0x000100ff, 0x010100ff, 0x000100ff, 0xff0100ff,
1091+
0x000000ff, 0x010000ff, 0x000000ff, 0xff0000ff,
1092+
0x00ff00ff, 0x01ff00ff, 0x00ff00ff, 0xffff00ff,
1093+
0x000001ff, 0x010001ff, 0x000001ff, 0xff0001ff,
1094+
0x000101ff, 0x010101ff, 0x000101ff, 0xff0101ff,
1095+
0x000001ff, 0x010001ff, 0x000001ff, 0xff0001ff,
1096+
0x00ff01ff, 0x01ff01ff, 0x00ff01ff, 0xffff01ff,
1097+
0x000000ff, 0x010000ff, 0x000000ff, 0xff0000ff,
1098+
0x000100ff, 0x010100ff, 0x000100ff, 0xff0100ff,
1099+
0x000000ff, 0x010000ff, 0x000000ff, 0xff0000ff,
1100+
0x00ff00ff, 0x01ff00ff, 0x00ff00ff, 0xffff00ff,
1101+
0x0000ffff, 0x0100ffff, 0x0000ffff, 0xff00ffff,
1102+
0x0001ffff, 0x0101ffff, 0x0001ffff, 0xff01ffff,
1103+
0x0000ffff, 0x0100ffff, 0x0000ffff, 0xff00ffff,
1104+
0x00ffffff, 0x01ffffff, 0x00ffffff, 0xffffffff,
1105+
GGML_TABLE_END()
1106+
1107+
GGML_TABLE_BEGIN(uint32_t, q1_3_grid, 256)
1108+
0xffffffff, 0xffffffff, 0xffffff00, 0xffffff01, 0xffff00ff, 0xffff0000, 0xffff0001, 0xffff01ff,
1109+
0xffff0100, 0xffff0101, 0xff00ffff, 0xff00ff00, 0xff00ff01, 0xff0000ff, 0xff000000, 0xff000001,
1110+
0xff0001ff, 0xff000100, 0xff000101, 0xff01ffff, 0xff01ffff, 0xff01ff00, 0xff01ff01, 0xff0100ff,
1111+
0xff010000, 0xff010001, 0xff0101ff, 0xff010100, 0xff010101, 0x00ffffff, 0x00ffff00, 0x00ffff01,
1112+
0x00ff00ff, 0x00ff0000, 0x00ff0001, 0x00ff01ff, 0x00ff0100, 0x00ff0101, 0x0000ffff, 0x0000ff00,
1113+
0x0000ff00, 0x0000ff01, 0x000000ff, 0x00000000, 0x00000001, 0x000001ff, 0x00000100, 0x00000101,
1114+
0x0001ffff, 0x0001ff00, 0x0001ff01, 0x000100ff, 0x00010000, 0x00010001, 0x000101ff, 0x00010100,
1115+
0x00010101, 0x01ffffff, 0x01ffff00, 0x01ffff01, 0x01ffff01, 0x01ff00ff, 0x01ff0000, 0x01ff0001,
1116+
0x01ff01ff, 0x01ff0100, 0x01ff0101, 0x0100ffff, 0x0100ff00, 0x0100ff01, 0x010000ff, 0x01000000,
1117+
0x01000001, 0x010001ff, 0x01000100, 0x01000101, 0x0101ffff, 0x0101ff00, 0x0101ff01, 0x0101ff01,
1118+
0x010100ff, 0x01010000, 0x01010001, 0x010101ff, 0x01010100, 0x01010101, 0xffffffff, 0xffffff00,
1119+
0xffffff01, 0xffff00ff, 0xffff0000, 0xffff0001, 0xffff01ff, 0xffff0100, 0xffff0101, 0xff00ffff,
1120+
0xff00ff00, 0xff00ff01, 0xff0000ff, 0xff0000ff, 0xff000000, 0xff000001, 0xff0001ff, 0xff000100,
1121+
0xff000101, 0xff01ffff, 0xff01ff00, 0xff01ff01, 0xff0100ff, 0xff010000, 0xff010001, 0xff0101ff,
1122+
0xff010100, 0xff010101, 0x00ffffff, 0x00ffff00, 0x00ffff01, 0x00ff00ff, 0x00ff0000, 0x00ff0000,
1123+
0x00ff0001, 0x00ff01ff, 0x00ff0100, 0x00ff0101, 0x0000ffff, 0x0000ff00, 0x0000ff01, 0x000000ff,
1124+
0x00000000, 0x00000001, 0x000001ff, 0x00000100, 0x00000101, 0x0001ffff, 0x0001ff00, 0x0001ff01,
1125+
0x000100ff, 0x00010000, 0x00010000, 0x00010001, 0x000101ff, 0x00010100, 0x00010101, 0x01ffffff,
1126+
0x01ffff00, 0x01ffff01, 0x01ff00ff, 0x01ff0000, 0x01ff0001, 0x01ff01ff, 0x01ff0100, 0x01ff0101,
1127+
0x0100ffff, 0x0100ff00, 0x0100ff01, 0x010000ff, 0x01000000, 0x01000001, 0x01000001, 0x010001ff,
1128+
0x01000100, 0x01000101, 0x0101ffff, 0x0101ff00, 0x0101ff01, 0x010100ff, 0x01010000, 0x01010001,
1129+
0x010101ff, 0x01010100, 0x01010101, 0xffffffff, 0xffffff00, 0xffffff01, 0xffff00ff, 0xffff0000,
1130+
0xffff0001, 0xffff01ff, 0xffff01ff, 0xffff0100, 0xffff0101, 0xff00ffff, 0xff00ff00, 0xff00ff01,
1131+
0xff0000ff, 0xff000000, 0xff000001, 0xff0001ff, 0xff000100, 0xff000101, 0xff01ffff, 0xff01ff00,
1132+
0xff01ff01, 0xff0100ff, 0xff010000, 0xff010001, 0xff0101ff, 0xff0101ff, 0xff010100, 0xff010101,
1133+
0x00ffffff, 0x00ffff00, 0x00ffff01, 0x00ff00ff, 0x00ff0000, 0x00ff0001, 0x00ff01ff, 0x00ff0100,
1134+
0x00ff0101, 0x0000ffff, 0x0000ff00, 0x0000ff01, 0x000000ff, 0x00000000, 0x00000001, 0x000001ff,
1135+
0x00000100, 0x00000100, 0x00000101, 0x0001ffff, 0x0001ff00, 0x0001ff01, 0x000100ff, 0x00010000,
1136+
0x00010001, 0x000101ff, 0x00010100, 0x00010101, 0x01ffffff, 0x01ffff00, 0x01ffff01, 0x01ff00ff,
1137+
0x01ff0000, 0x01ff0001, 0x01ff01ff, 0x01ff0100, 0x01ff0101, 0x01ff0101, 0x0100ffff, 0x0100ff00,
1138+
0x0100ff01, 0x010000ff, 0x01000000, 0x01000001, 0x010001ff, 0x01000100, 0x01000101, 0x0101ffff,
1139+
0x0101ff00, 0x0101ff01, 0x010100ff, 0x01010000, 0x01010001, 0x010101ff, 0x01010100, 0x01010101,
1140+
GGML_TABLE_END()
1141+
10251142
#define NGRID_IQ1S 2048
10261143
#define IQ1S_DELTA 0.125f
10271144
#define IQ1M_DELTA 0.125f

0 commit comments

Comments
 (0)