Skip to content

Commit ca74a33

Browse files
committed
speculative : fix probability output for drafts
ggml-ci
1 parent 7430eba commit ca74a33

File tree

1 file changed

+3
-4
lines changed

1 file changed

+3
-4
lines changed

examples/speculative/speculative.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ int main(int argc, char ** argv) {
3737
return 1;
3838
}
3939

40+
// for probabilities to be computed even with temp = 0
41+
params.sparams.n_probs = 16;
42+
4043
// max number of parallel drafting sequences (i.e. tree branches)
4144
const int n_seq_dft = params.n_parallel;
4245

@@ -182,10 +185,6 @@ int main(int argc, char ** argv) {
182185
// draft sequence data
183186
std::vector<seq_draft> drafts(n_seq_dft);
184187

185-
if (params.sparams.temp == 0) {
186-
params.sparams.temp = -1.0f; // force greedy sampling with probs for the draft model
187-
}
188-
189188
for (int s = 0; s < n_seq_dft; ++s) {
190189
// allocate llama_sampling for each draft sequence
191190
drafts[s].smpl = llama_sampling_init(model_dft, params.sparams);

0 commit comments

Comments
 (0)