@@ -1814,12 +1814,12 @@ static void llama_sampler_infill_apply(struct llama_sampler * smpl, llama_token_
1814
1814
1815
1815
// combine tokens with common prefix
1816
1816
for (size_t i0 = 0 ; i0 < cur_p->size ; ++i0) {
1817
- for (size_t j0 = 0 ; j0 < cur_p->size ; ++j0 ) {
1817
+ for (size_t i1 = 0 ; i1 < cur_p->size ; ++i1 ) {
1818
1818
if (cur_p->data [i0].logit == -INFINITY) {
1819
1819
break ;
1820
1820
}
1821
1821
1822
- if (i0 == j0 || cur_p->data [j0 ].logit == -INFINITY) {
1822
+ if (i0 == i1 || cur_p->data [i1 ].logit == -INFINITY) {
1823
1823
continue ;
1824
1824
}
1825
1825
@@ -1830,20 +1830,20 @@ static void llama_sampler_infill_apply(struct llama_sampler * smpl, llama_token_
1830
1830
assert (len0 > 0 );
1831
1831
}
1832
1832
1833
- int len1 = llama_token_to_piece_impl (*ctx->vocab , cur_p->data [j0 ].id , ctx->buf1 .data (), ctx->buf1 .size (), 0 , false );
1833
+ int len1 = llama_token_to_piece_impl (*ctx->vocab , cur_p->data [i1 ].id , ctx->buf1 .data (), ctx->buf1 .size (), 0 , false );
1834
1834
if (len1 < 0 ) {
1835
1835
ctx->buf1 .resize (len1);
1836
- len1 = llama_token_to_piece_impl (*ctx->vocab , cur_p->data [j0 ].id , ctx->buf1 .data (), ctx->buf1 .size (), 0 , false );
1836
+ len1 = llama_token_to_piece_impl (*ctx->vocab , cur_p->data [i1 ].id , ctx->buf1 .data (), ctx->buf1 .size (), 0 , false );
1837
1837
assert (len1 > 0 );
1838
1838
}
1839
1839
1840
- // token i0 is a prefix of token j0
1840
+ // token i0 is a prefix of token i1
1841
1841
if (len0 > 0 && len0 <= len1 && memcmp (ctx->buf0 .data (), ctx->buf1 .data (), len0) == 0 ) {
1842
1842
int dst = i0;
1843
- int src = j0 ;
1843
+ int src = i1 ;
1844
1844
1845
1845
// merge into the token with higher probability
1846
- if (cur_p->data [j0 ].p > cur_p->data [i0].p ) {
1846
+ if (cur_p->data [i1 ].p > cur_p->data [i0].p ) {
1847
1847
std::swap (dst, src);
1848
1848
}
1849
1849
0 commit comments