We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 1193778 commit b96f9afCopy full SHA for b96f9af
common/common.h
@@ -73,7 +73,6 @@ struct gpt_params {
73
int32_t n_gpu_layers_draft = -1; // number of layers to store in VRAM for the draft model (-1 - use default)
74
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
75
float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs
76
- int32_t n_beams = 0; // if non-zero then use beam search of given width.
77
int32_t grp_attn_n = 1; // group-attention factor
78
int32_t grp_attn_w = 512; // group-attention width
79
int32_t n_print = -1; // print token count every n tokens (-1 = disabled)
0 commit comments