@@ -547,6 +547,36 @@ struct llama_file {
547
547
}
548
548
}
549
549
550
+ void pread_raw (void * ptr, size_t len, int64_t offset) const {
551
+ #ifndef _WIN32
552
+ int fd = fileno (fp);
553
+ ssize_t nread;
554
+ while (len) {
555
+ nread = pread (fd, ptr, len, off_t (offset));
556
+ if (nread < 0 ) {
557
+ throw std::runtime_error (format (" read error: %s" , strerror (errno)));
558
+ }
559
+ if (nread == 0 ) {
560
+ throw std::runtime_error (" unexpectedly reached end of file" );
561
+ }
562
+ len -= nread;
563
+ offset += nread;
564
+ }
565
+ #else
566
+ if (len == 0 ) { return ; }
567
+ HANDLE handle = (HANDLE)_get_osfhandle (_fileno (fp));
568
+ DWORD nread;
569
+ OVERLAPPED overlapped = {};
570
+ overlapped.Offset = DWORD (offset);
571
+ overlapped.OffsetHigh = DWORD (offset << 32 );
572
+ bool res = ReadFile (handle, ptr, len, &nread, &overlapped);
573
+ if (!res) {
574
+ auto error = GetLastError ();
575
+ throw std::runtime_error (format (" ReadFile failed: %s" , llama_format_win_err (error).c_str ()));
576
+ }
577
+ #endif
578
+ }
579
+
550
580
uint32_t read_u32 () const {
551
581
uint32_t ret;
552
582
read_raw (&ret, sizeof (ret));
@@ -564,6 +594,32 @@ struct llama_file {
564
594
}
565
595
}
566
596
597
+ void pwrite_raw (const void * ptr, size_t len, int64_t offset) const {
598
+ #ifndef _WIN32
599
+ int fd = fileno (fp);
600
+ ssize_t nwrite;
601
+ while (len) {
602
+ nwrite = pwrite (fd, ptr, len, off_t (offset));
603
+ if (nwrite < 0 ) {
604
+ throw std::runtime_error (format (" write error: %s" , strerror (errno)));
605
+ }
606
+ len -= nwrite;
607
+ offset += nwrite;
608
+ }
609
+ #else
610
+ auto * handle = (HANDLE)_get_osfhandle (_fileno (fp));
611
+ DWORD nwrite;
612
+ OVERLAPPED overlapped = {};
613
+ overlapped.Offset = DWORD (offset);
614
+ overlapped.OffsetHigh = DWORD (offset << 32 );
615
+ bool res = WriteFile (handle, ptr, len, &nwrite, &overlapped);
616
+ if (!res) {
617
+ auto error = GetLastError ();
618
+ throw std::runtime_error (format (" WriteFile failed: %s" , llama_format_win_err (error).c_str ()));
619
+ }
620
+ #endif
621
+ }
622
+
567
623
void write_u32 (std::uint32_t val) const {
568
624
write_raw (&val, sizeof (val));
569
625
}
@@ -1446,16 +1502,7 @@ struct llama_model_loader {
1446
1502
if (use_mmap) {
1447
1503
cur->data = (uint8_t *) mapping->addr + offs;
1448
1504
} else {
1449
- #ifndef _WIN32
1450
- pread (fileno (file.fp ), cur->data , ggml_nbytes (cur), offs);
1451
- #else
1452
- auto * handle = (HANDLE)_get_osfhandle (_fileno (file.fp ));
1453
- DWORD nread;
1454
- OVERLAPPED overlapped = {};
1455
- overlapped.Offset = DWORD (offs);
1456
- overlapped.OffsetHigh = DWORD (offs << 32 );
1457
- ReadFile (handle, cur->data , ggml_nbytes (cur), &nread, &overlapped);
1458
- #endif
1505
+ file.pread_raw (cur->data , ggml_nbytes (cur), offs);
1459
1506
}
1460
1507
}
1461
1508
@@ -4916,32 +4963,26 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
4916
4963
gguf_add_tensor (ctx_out, meta);
4917
4964
}
4918
4965
4919
- auto * fout = fopen (fname_out.c_str (), " wb" );
4966
+ auto fout = llama_file (fname_out.c_str (), " wb" );
4920
4967
4921
4968
const size_t meta_size = gguf_get_meta_size (ctx_out);
4922
4969
4923
4970
LLAMA_LOG_INFO (" %s: meta size = %zu bytes\n " , __func__, meta_size);
4924
4971
4925
4972
// placeholder for the meta data
4926
- fseek ( fout, meta_size, SEEK_SET);
4973
+ fout. seek ( meta_size, SEEK_SET);
4927
4974
4928
4975
std::vector<std::vector<no_init<float >>> f32_conv_buf_pool (nthreads2);
4929
4976
std::mutex log_mutex;
4930
4977
4931
- #ifndef _WIN32
4932
- using off_type = off_t ;
4933
- #else
4934
- using off_type = LONGLONG;
4935
- #endif
4936
-
4937
4978
std::vector<ggml_type> quant_tensor_types;
4938
4979
std::vector<size_t > quant_tensor_sizes;
4939
- std::vector<off_type> quant_tensor_offsets;
4980
+ std::vector<int64_t > quant_tensor_offsets;
4940
4981
quant_tensor_types.reserve (ml->n_tensors );
4941
4982
quant_tensor_sizes.reserve (ml->n_tensors );
4942
4983
quant_tensor_offsets.reserve (ml->n_tensors );
4943
4984
4944
- off_type fpos = meta_size;
4985
+ int64_t fpos = meta_size;
4945
4986
for (int i = 0 ; i < ml->n_tensors ; ++i) {
4946
4987
struct ggml_tensor * tensor = ml->get_tensor_meta (i);
4947
4988
@@ -5094,16 +5135,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
5094
5135
log_lock.unlock ();
5095
5136
5096
5137
// write tensor data
5097
- #ifndef _WIN32
5098
- pwrite (fileno (fout), new_data, new_size, quant_tensor_offsets[i]);
5099
- #else
5100
- auto * handle = (HANDLE)_get_osfhandle (_fileno (fout));
5101
- DWORD nwrite;
5102
- OVERLAPPED overlapped = {};
5103
- overlapped.Offset = DWORD (quant_tensor_offsets[i]);
5104
- overlapped.OffsetHigh = DWORD (quant_tensor_offsets[i] << 32 );
5105
- WriteFile (handle, new_data, new_size, &nwrite, &overlapped);
5106
- #endif
5138
+ fout.pwrite_raw (new_data, new_size, quant_tensor_offsets[i]);
5107
5139
};
5108
5140
5109
5141
{
@@ -5115,18 +5147,16 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
5115
5147
}
5116
5148
5117
5149
// write final padding
5118
- fseek ( fout, fpos, SEEK_SET);
5150
+ fout. seek ( fpos, SEEK_SET);
5119
5151
5120
5152
// go back to beginning of file and write the updated meta data
5121
5153
{
5122
- rewind ( fout);
5154
+ fout. seek ( 0 , SEEK_SET );
5123
5155
std::vector<uint8_t > data (gguf_get_meta_size (ctx_out));
5124
5156
gguf_get_meta_data (ctx_out, data.data ());
5125
- fwrite (data.data (), data.size (), 1 , fout );
5157
+ fout. write_raw (data.data (), data.size ());
5126
5158
}
5127
5159
5128
- fclose (fout);
5129
-
5130
5160
gguf_free (ctx_out);
5131
5161
5132
5162
LLAMA_LOG_INFO (" %s: model size = %8.2f MB\n " , __func__, total_size_org/1024.0 /1024.0 );
0 commit comments