@@ -1879,12 +1879,7 @@ void llama_ngram_cache_update(llama_ngram_cache & ngram_cache, int ngram_min, in
1879
1879
const int i_start = std::max (inp_size - nnew, ngram_size);
1880
1880
for (int i = i_start; i < inp_size; ++i) {
1881
1881
const int ngram_start = i - ngram_size;
1882
- llama_ngram ngram = inp[ngram_start];
1883
- for (int j = ngram_start+1 ; j < ngram_start + ngram_size; ++j) { // FIXME
1884
- const llama_ngram ngram_part = inp[j];
1885
- ngram <<= 16 ;
1886
- ngram |= ngram_part;
1887
- }
1882
+ llama_ngram ngram (&inp[ngram_start], ngram_size);
1888
1883
const llama_token token = inp[i];
1889
1884
1890
1885
llama_ngram_cache::iterator part_it = ngram_cache.find (ngram);
@@ -2019,11 +2014,9 @@ void llama_ngram_cache_draft(
2019
2014
llama_token drafted_token = -1 ;
2020
2015
2021
2016
const int ngram_start_static = inp_size-LLAMA_NGRAM_STATIC + draft.size ()-1 ;
2022
- llama_ngram ngram_static = get_token (inp, draft, ngram_start_static);
2023
- for (int j = ngram_start_static+1 ; j < ngram_start_static + LLAMA_NGRAM_STATIC; ++j) {
2024
- const llama_ngram token = get_token (inp, draft, j);
2025
- ngram_static <<= 16 ;
2026
- ngram_static |= token;
2017
+ llama_ngram ngram_static;
2018
+ for (int j = ngram_start_static; j < ngram_start_static + LLAMA_NGRAM_STATIC; ++j) {
2019
+ ngram_static.tokens [j-ngram_start_static] = get_token (inp, draft, j);
2027
2020
}
2028
2021
llama_ngram_cache::iterator part_static_it = nc_static.find (ngram_static);
2029
2022
llama_ngram_cache_part part_static;
@@ -2035,11 +2028,9 @@ void llama_ngram_cache_draft(
2035
2028
std::vector<llama_ngram> ngrams_cd;
2036
2029
for (int ngram_size_cd = ngram_min; ngram_size_cd <= ngram_max; ++ngram_size_cd) {
2037
2030
const int ngram_start_cd = inp_size-ngram_size_cd + draft.size ()-1 ;
2038
- llama_ngram ngram_cd = get_token (inp, draft, ngram_start_cd);
2039
- for (int j = ngram_start_cd+1 ; j < ngram_start_cd + ngram_size_cd; ++j) {
2040
- const llama_ngram token = get_token (inp, draft, j);
2041
- ngram_cd <<= 16 ;
2042
- ngram_cd |= token;
2031
+ llama_ngram ngram_cd;
2032
+ for (int j = ngram_start_cd; j < ngram_start_cd + ngram_size_cd; ++j) {
2033
+ ngram_cd.tokens [j-ngram_start_cd] = get_token (inp, draft, j);
2043
2034
}
2044
2035
ngrams_cd.push_back (ngram_cd);
2045
2036
}
0 commit comments