@@ -88,8 +88,6 @@ int main(int argc, char ** argv) {
88
88
89
89
// Add a space in front of the first character to match OG llama tokenizer behavior
90
90
params.prompt .insert (0 , 1 , ' ' );
91
- // tokenize the prompt
92
- std::vector<gpt_vocab::id> embd_inp = llama_tokenize_text (ctx, params.prompt );
93
91
94
92
// tokenize the reverse prompt
95
93
std::vector<gpt_vocab::id> antiprompt_inp = llama_tokenize_text (ctx, params.prompt );
@@ -140,15 +138,15 @@ int main(int argc, char ** argv) {
140
138
printf (ANSI_COLOR_YELLOW);
141
139
}
142
140
143
- if (!llama_injest_input (ctx, params.prompt ))
141
+ if (!llama_ingest_input (ctx, params.prompt ))
144
142
{
145
- fprintf (stderr, " Failed to injest prompt\n " );
143
+ fprintf (stderr, " Failed to ingest prompt\n " );
146
144
return 1 ;
147
145
};
148
146
149
147
// display text
150
148
input_noecho = false ;
151
- const std::vector<gpt_vocab::id>& embd = llama_context_get_embd (ctx);
149
+ const std::vector<gpt_vocab::id>& embd = llama_context_get_embedding (ctx);
152
150
if (!input_noecho) {
153
151
for (auto id : embd) {
154
152
printf (" %s" , vocab.id_to_token [id].c_str ());
@@ -162,15 +160,14 @@ int main(int argc, char ** argv) {
162
160
163
161
const std::vector<gpt_vocab::id>& last_n_tokens = llama_context_get_last_n_tokens (ctx);
164
162
165
- while (llama_context_not_finished (ctx) > 0 ) {
163
+ while (llama_context_is_finished (ctx) != true ) {
166
164
gpt_vocab::id model_output = 0 ;
167
- bool response = llama_inference (ctx, model_output);
165
+ bool response = llama_infer (ctx, model_output);
168
166
if (response) {
169
167
printf (" %s" , vocab.id_to_token [model_output].c_str ());
170
168
fflush (stdout);
171
169
}
172
170
173
-
174
171
// in interactive mode, and not currently processing queued inputs;
175
172
// check if we should prompt the user for more
176
173
if (params.interactive ) {
@@ -204,7 +201,7 @@ int main(int argc, char ** argv) {
204
201
buf[n_read+1 ] = 0 ;
205
202
}
206
203
// Do not clear existing context in interactive mode
207
- llama_init_context_with_prompt (ctx, buf, false );
204
+ llama_update_context_with_prompt (ctx, buf, false );
208
205
}
209
206
210
207
is_interacting = false ;
0 commit comments