File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -157,8 +157,8 @@ struct common_params_sampling {
157
157
158
158
struct common_params_speculative {
159
159
int32_t n_ctx = 4096 ; // draft context size
160
- int32_t n_max = 5 ; // maximum number of tokens to draft during speculative decoding
161
- int32_t n_min = 0 ; // minimum number of draft tokens to use for speculative decoding
160
+ int32_t n_max = 16 ; // maximum number of tokens to draft during speculative decoding
161
+ int32_t n_min = 5 ; // minimum number of draft tokens to use for speculative decoding
162
162
int32_t n_gpu_layers = -1 ; // number of layers to store in VRAM for the draft model (-1 - use default)
163
163
float p_split = 0 .1f ; // speculative decoding split probability
164
164
float p_min = 0 .9f ; // minimum speculative decoding probability (greedy)
You can’t perform that action at this time.
0 commit comments