Skip to content

Commit aa73a4e

Browse files
committed
use_some_bits and use_most_bits
1 parent 7794c8f commit aa73a4e

File tree

1 file changed

+16
-1
lines changed

1 file changed

+16
-1
lines changed

src/llama.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18071,11 +18071,26 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1807118071
auto difquant_three_eights_alt_tensors = [](int i_layer, int n_layers) -> bool {
1807218072
return i_layer <= n_layers/8 || (i_layer > 4*n_layers/8 && i_layer < 5*n_layers/8) || i_layer >= 7*n_layers/8;
1807318073
};
18074+
18075+
// original formula use_some_bits :
18076+
auto use_some_bits = [](int i_layer, int n_layers) -> bool {
18077+
return i_layer < n_layers/8 || i_layer >= 7*n_layers/8 || (i_layer - n_layers/8)%4 == 2;
18078+
};
18079+
1807418080
// original formula use_more_bits :
18075-
// return i_layer < n_layers/8 || i_layer >= 7*n_layers/8 || (i_layer - n_layers/8)%3 == 2;
18081+
auto use_more_bits = [](int i_layer, int n_layers) -> bool {
18082+
return i_layer < n_layers/8 || i_layer >= 7*n_layers/8 || (i_layer - n_layers/8)%3 == 2;
18083+
};
18084+
1807618085
// The intervals of 3 are replaced by a broad bump in the central layers.
1807718086
// In the case of a 32 layers model, layers 5-7 and layers 12-16 are always skipped.
1807818087
// In the case of a 40 layers model, layers 6-9 and layers 15-20 are always skipped.
18088+
18089+
// new formula use_most_bits :
18090+
auto use_most_bits = [](int i_layer, int n_layers) -> bool {
18091+
return i_layer < n_layers/8 || i_layer >= 7*n_layers/8 || (i_layer - n_layers/8)%2 == 1;
18092+
};
18093+
1807918094
// difquant_half_tensors replaces it and keeps the broad 50% bump to the upper quant. Ex : 16/32
1808018095
auto difquant_half_tensors = [](int i_layer, int n_layers) -> bool {
1808118096
// return i_layer <= n_layers/8 || (i_layer >= 2*n_layers/8 && i_layer < 3*n_layers/8) ||

0 commit comments

Comments
 (0)