@@ -66,19 +66,19 @@ class Llama {
66
66
String loraBase;
67
67
List <(String , double )> loraAdapters;
68
68
69
- // late SamplingContext sampling;
70
-
71
69
/// Constructor for Llama.
72
70
///
73
71
/// Loads the model and context based on provided model and context parameters.
74
72
Llama (String modelPath,
75
73
[ModelParams ? modelParams,
76
74
ContextParams ? contextParams,
77
- this .samplingParams,
75
+ SamplingParams ? samplingParams,
76
+ // this.samplingParams,
78
77
this .loraBase = "" ,
79
78
this .loraAdapters = const []])
80
79
: modelParams = modelParams ?? ModelParams (),
81
- contextParams = contextParams ?? ContextParams () {
80
+ contextParams = contextParams ?? ContextParams (),
81
+ samplingParams = samplingParams ?? SamplingParams () {
82
82
lib.llama_backend_init (false );
83
83
llama_model_params modelParams = this .modelParams.get ();
84
84
@@ -118,8 +118,6 @@ class Llama {
118
118
}
119
119
}
120
120
malloc.free (cLoraBase);
121
-
122
- // sampling = SamplingContext(this);
123
121
}
124
122
125
123
/// Releases all resources associated with the Llama instance.
@@ -147,8 +145,8 @@ class Llama {
147
145
/// An exception is thrown if the required KV cache size exceeds the context's limit.
148
146
/// The function also initializes the batch for token processing.
149
147
setPrompt (String prompt) {
148
+ // context = lib.llama_new_context_with_model(model, contextParams.get());
150
149
tokensList = tokenize (prompt, true );
151
- // temporaryInvalidCChars = [];
152
150
153
151
if (length != - 1 ) {
154
152
int nCtx = lib.llama_n_ctx (context);
@@ -180,8 +178,6 @@ class Llama {
180
178
/// Returns a tuple with the generated text and a boolean indicating if the end-of-sequence token is reached.
181
179
/// An exception is thrown if llama_decode fails during processing.
182
180
(String , bool ) getNext () {
183
- samplingParams ?? = SamplingParams ();
184
-
185
181
Pointer <Int32 > newTokenId = calloc.allocate <Int32 >(sizeOf <Int32 >());
186
182
final nVocab = lib.llama_n_vocab (model);
187
183
final logits = lib.llama_get_logits_ith (context, batch.n_tokens - 1 );
@@ -202,33 +198,12 @@ class Llama {
202
198
..size = nVocab
203
199
..sorted = true ;
204
200
205
- /*
206
- final Pointer<llama_token> nativeLastTokens =
207
- malloc.allocate<llama_token>(sizeOf<llama_token>() * lastTokens.length);
208
- for (int i = 0; i < lastTokens.length; i++) {
209
- nativeLastTokens.elementAt(i).value = i;
210
- }
211
-
212
- Pointer<llama_sampling_params> sp = samplingParams!.get();
213
- lib.llama_sample_repetition_penalties(
214
- context,
215
- candidatesP,
216
- nativeLastTokens,
217
- sp.ref.penalty_last_n,
218
- sp.ref.penalty_repeat,
219
- sp.ref.penalty_freq,
220
- sp.ref.penalty_present);
221
- */
222
-
223
201
SamplingContext sampling = SamplingContext (this );
224
202
sampling.params = samplingParams;
225
203
226
- // int minKeep = max(1, samplingParams.nProbs);
227
- // sampling.tfsZ(candidatesP, minKeep, nVocab);
228
204
newTokenId.value = candidatesP.ref.data.elementAt (0 ).ref.id;
229
205
newTokenId.value = sampling.sample (newTokenId, null );
230
-
231
- // newTokenId.value = candidatesP.ref.data.elementAt(0).ref.id;
206
+ sampling.accept (newTokenId.value);
232
207
233
208
// newTokenId.value = lib.llama_sample_token_greedy(context, candidatesP);
234
209
// lastTokens.add(newTokenId);
@@ -239,13 +214,6 @@ class Llama {
239
214
240
215
sampling.dispose ();
241
216
242
- if (newTokenId.value == lib.llama_token_eos (model)) {
243
- int token = newTokenId.value;
244
- calloc.free (newTokenId);
245
- final newTokenStr = tokenToPiece (newTokenId.value);
246
- return (newTokenStr, token == lib.llama_token_eos (model));
247
- }
248
-
249
217
final newTokenStr = tokenToPiece (newTokenId.value);
250
218
251
219
batch.n_tokens = 0 ;
@@ -288,11 +256,8 @@ class Llama {
288
256
lastTokens.clear ();
289
257
lib.llama_kv_cache_clear (context);
290
258
batch.n_tokens = 0 ;
291
- tokensList.clear ();
292
- lastTokens.clear ();
293
259
cursor = 0 ;
294
260
decode = 0 ;
295
- // lib.llama
296
261
}
297
262
298
263
// Utility methods
0 commit comments