@@ -98,8 +98,6 @@ int main(int argc, char ** argv) {
98
98
99
99
// Add a space in front of the first character to match OG llama tokenizer behavior
100
100
params.prompt .insert (0 , 1 , ' ' );
101
- // tokenize the prompt
102
- std::vector<gpt_vocab::id> embd_inp = llama_tokenize_text (ctx, params.prompt );
103
101
104
102
// prefix & suffix for instruct mode
105
103
const std::vector<gpt_vocab::id> inp_pfx = ::llama_tokenize (vocab, " \n\n ### Instruction:\n\n " , true );
@@ -161,15 +159,15 @@ int main(int argc, char ** argv) {
161
159
printf (ANSI_COLOR_YELLOW);
162
160
}
163
161
164
- if (!llama_injest_input (ctx, params.prompt ))
162
+ if (!llama_ingest_input (ctx, params.prompt ))
165
163
{
166
- fprintf (stderr, " Failed to injest prompt\n " );
164
+ fprintf (stderr, " Failed to ingest prompt\n " );
167
165
return 1 ;
168
166
};
169
167
170
168
// display text
171
169
input_noecho = false ;
172
- const std::vector<gpt_vocab::id>& embd = llama_context_get_embd (ctx);
170
+ const std::vector<gpt_vocab::id>& embd = llama_context_get_embedding (ctx);
173
171
if (!input_noecho) {
174
172
for (auto id : embd) {
175
173
printf (" %s" , vocab.id_to_token [id].c_str ());
@@ -183,9 +181,9 @@ int main(int argc, char ** argv) {
183
181
184
182
const std::vector<gpt_vocab::id>& last_n_tokens = llama_context_get_last_n_tokens (ctx);
185
183
186
- while (llama_context_not_finished (ctx) > 0 ) {
184
+ while (llama_context_is_finished (ctx) != true ) {
187
185
gpt_vocab::id model_output = 0 ;
188
- bool response = llama_inference (ctx, model_output);
186
+ bool response = llama_infer (ctx, model_output);
189
187
if (response) {
190
188
printf (" %s" , vocab.id_to_token [model_output].c_str ());
191
189
fflush (stdout);
@@ -195,7 +193,6 @@ int main(int argc, char ** argv) {
195
193
printf (ANSI_COLOR_RESET);
196
194
}
197
195
198
-
199
196
// in interactive mode, and not currently processing queued inputs;
200
197
// check if we should prompt the user for more
201
198
if (params.interactive ) {
@@ -228,7 +225,7 @@ int main(int argc, char ** argv) {
228
225
line.pop_back (); // Remove the continue character
229
226
}
230
227
// Do not clear existing context in interactive mode
231
- llama_init_context_with_prompt (ctx, buf, false );
228
+ llama_update_context_with_prompt (ctx, buf, false );
232
229
}
233
230
234
231
remaining_tokens -= line_inp.size ();
0 commit comments