@@ -61,7 +61,7 @@ struct llama_server_context
61
61
std::vector<llama_token> prompt_tokens = ::llama_tokenize (ctx, params.prompt , true );
62
62
// compare the evaluated prompt with the new prompt
63
63
int new_prompt_len = 0 ;
64
- for (int i = 0 ;i < prompt_tokens.size (); i++) {
64
+ for (unsigned int i = 0 ;i < prompt_tokens.size (); i++) {
65
65
if (i < processed_tokens.size () &&
66
66
processed_tokens[i] == prompt_tokens[i])
67
67
{
@@ -71,7 +71,7 @@ struct llama_server_context
71
71
{
72
72
embd_inp.push_back (prompt_tokens[i]);
73
73
if (new_prompt_len == 0 ) {
74
- if (i - 1 < n_past) {
74
+ if ((( int )i) - 1 < ( int ) n_past) {
75
75
processed_tokens.erase (processed_tokens.begin () + i, processed_tokens.end ());
76
76
}
77
77
// Evaluate the new fragment prompt from the last token processed.
@@ -136,7 +136,7 @@ struct llama_server_context
136
136
{
137
137
// out of user input, sample next token
138
138
const float temp = params.temp ;
139
- const int32_t top_k = params.top_k <= 0 ? llama_n_vocab (ctx) : params.top_k ;
139
+ // const int32_t top_k = params.top_k <= 0 ? llama_n_vocab(ctx) : params.top_k;
140
140
const float top_p = params.top_p ;
141
141
const float tfs_z = params.tfs_z ;
142
142
const float typical_p = params.typical_p ;
@@ -306,12 +306,12 @@ struct llama_server_context
306
306
// Avoid add the no show words to the response
307
307
for (std::vector<llama_token> word_tokens : no_show_words)
308
308
{
309
- int match_token = 1 ;
309
+ unsigned int match_token = 1 ;
310
310
if (tokens_predicted.front () == word_tokens.front ())
311
311
{
312
312
bool execute_matching = true ;
313
313
if (tokens_predicted.size () > 1 ) { // if previus tokens had been tested
314
- for (int i = 1 ; i < word_tokens.size (); i++)
314
+ for (unsigned int i = 1 ; i < word_tokens.size (); i++)
315
315
{
316
316
if (i >= tokens_predicted.size ()) {
317
317
match_token = i;
@@ -601,7 +601,7 @@ int main(int argc, char **argv)
601
601
602
602
Server svr;
603
603
604
- svr.Get (" /" , [](const Request &req , Response &res)
604
+ svr.Get (" /" , [](const Request &, Response &res)
605
605
{ res.set_content (" <h1>llama.cpp server works</h1>" , " text/html" ); });
606
606
607
607
svr.Post (" /completion" , [&llama](const Request &req, Response &res)
@@ -649,7 +649,7 @@ int main(int argc, char **argv)
649
649
{" tokens_predicted" , llama.num_tokens_predicted }};
650
650
return res.set_content (data.dump (), " application/json" );
651
651
}
652
- catch (json::exception e)
652
+ catch (json::exception const & e)
653
653
{
654
654
// Some tokens have bad UTF-8 strings, the json parser is very sensitive
655
655
json data = {
@@ -701,7 +701,7 @@ int main(int argc, char **argv)
701
701
{" content" , result },
702
702
{" stop" , !llama.has_next_token }};
703
703
return res.set_content (data.dump (), " application/json" );
704
- } catch (json::exception e) {
704
+ } catch (json::exception const & e) {
705
705
// Some tokens have bad UTF-8 strings, the json parser is very sensitive
706
706
json data = {
707
707
{" content" , " " },
0 commit comments