Skip to content

Commit 2238915

Browse files
committed
llama : write zeros by seeking in quantize -> 0.13% faster
1 parent e0680ac commit 2238915

File tree

1 file changed

+2
-9
lines changed

1 file changed

+2
-9
lines changed

llama.cpp

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -125,13 +125,6 @@ void replace_all(std::string & s, const std::string & search, const std::string
125125
#include <hbwmalloc.h>
126126
#endif
127127

128-
static void zeros(std::ofstream & file, size_t n) {
129-
char zero = 0;
130-
for (size_t i = 0; i < n; ++i) {
131-
file.write(&zero, 1);
132-
}
133-
}
134-
135128
LLAMA_ATTRIBUTE_FORMAT(1, 2)
136129
static std::string format(const char * fmt, ...) {
137130
va_list ap;
@@ -4922,7 +4915,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
49224915
LLAMA_LOG_INFO("%s: meta size = %zu bytes\n", __func__, meta_size);
49234916

49244917
// placeholder for the meta data
4925-
::zeros(fout, meta_size);
4918+
fout.seekp(meta_size, std::ios_base::beg);
49264919

49274920
for (int i = 0; i < ml->n_tensors; ++i) {
49284921
struct ggml_tensor * tensor = ml->get_tensor_meta(i);
@@ -5053,7 +5046,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
50535046

50545047
// write tensor data + padding
50555048
fout.write((const char *) new_data, new_size);
5056-
zeros(fout, GGML_PAD(new_size, align) - new_size);
5049+
fout.seekp(GGML_PAD(new_size, align) - new_size, std::ios_base::cur);
50575050
}
50585051

50595052
// go back to beginning of file and write the updated meta data

0 commit comments

Comments
 (0)