We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 1e6f655 commit 3195854Copy full SHA for 3195854
include/llama.h
@@ -345,7 +345,7 @@ extern "C" {
345
int32_t nthread; // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
346
enum llama_ftype ftype; // quantize to this llama_ftype
347
enum ggml_type output_tensor_type; // output tensor type
348
- enum ggml_type token_embedding_type; // itoken embeddings tensor type
+ enum ggml_type token_embedding_type; // token embeddings tensor type
349
bool allow_requantize; // allow quantizing non-f32/f16 tensors
350
bool quantize_output_tensor; // quantize output.weight
351
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
0 commit comments