llama : write zeros by seeking in quantize -> 0.13% faster

cebtenzzre · cebtenzzre · commit 22389156b992 · 2023-09-10T19:41:33.000-04:00
diff --git a/llama.cpp b/llama.cpp
@@ -125,13 +125,6 @@ void replace_all(std::string & s, const std::string & search, const std::string
 #include <hbwmalloc.h>
 #endif
 
-static void zeros(std::ofstream & file, size_t n) {
-    char zero = 0;
-    for (size_t i = 0; i < n; ++i) {
-        file.write(&zero, 1);
-    }
-}
-
 LLAMA_ATTRIBUTE_FORMAT(1, 2)
 static std::string format(const char * fmt, ...) {
     va_list ap;
@@ -4922,7 +4915,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
     LLAMA_LOG_INFO("%s: meta size = %zu bytes\n", __func__, meta_size);
 
     // placeholder for the meta data
-    ::zeros(fout, meta_size);
+    fout.seekp(meta_size, std::ios_base::beg);
 
     for (int i = 0; i < ml->n_tensors; ++i) {
         struct ggml_tensor * tensor = ml->get_tensor_meta(i);
@@ -5053,7 +5046,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
 
         // write tensor data + padding
         fout.write((const char *) new_data, new_size);
-        zeros(fout, GGML_PAD(new_size, align) - new_size);
+        fout.seekp(GGML_PAD(new_size, align) - new_size, std::ios_base::cur);
     }
 
     // go back to beginning of file and write the updated meta data