Skip to content

Commit 512705e

Browse files
committed
llama : introduce pread_raw and pwrite_raw
1 parent 116e945 commit 512705e

File tree

1 file changed

+65
-35
lines changed

1 file changed

+65
-35
lines changed

llama.cpp

Lines changed: 65 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,36 @@ struct llama_file {
547547
}
548548
}
549549

550+
void pread_raw(void * ptr, size_t len, int64_t offset) const {
551+
#ifndef _WIN32
552+
int fd = fileno(fp);
553+
ssize_t nread;
554+
while (len) {
555+
nread = pread(fd, ptr, len, off_t(offset));
556+
if (nread < 0) {
557+
throw std::runtime_error(format("read error: %s", strerror(errno)));
558+
}
559+
if (nread == 0) {
560+
throw std::runtime_error("unexpectedly reached end of file");
561+
}
562+
len -= nread;
563+
offset += nread;
564+
}
565+
#else
566+
if (len == 0) { return; }
567+
HANDLE handle = (HANDLE)_get_osfhandle(_fileno(fp));
568+
DWORD nread;
569+
OVERLAPPED overlapped = {};
570+
overlapped.Offset = DWORD(offset);
571+
overlapped.OffsetHigh = DWORD(offset << 32);
572+
bool res = ReadFile(handle, ptr, len, &nread, &overlapped);
573+
if (!res) {
574+
auto error = GetLastError();
575+
throw std::runtime_error(format("ReadFile failed: %s", llama_format_win_err(error).c_str()));
576+
}
577+
#endif
578+
}
579+
550580
uint32_t read_u32() const {
551581
uint32_t ret;
552582
read_raw(&ret, sizeof(ret));
@@ -564,6 +594,32 @@ struct llama_file {
564594
}
565595
}
566596

597+
void pwrite_raw(const void * ptr, size_t len, int64_t offset) const {
598+
#ifndef _WIN32
599+
int fd = fileno(fp);
600+
ssize_t nwrite;
601+
while (len) {
602+
nwrite = pwrite(fd, ptr, len, off_t(offset));
603+
if (nwrite < 0) {
604+
throw std::runtime_error(format("write error: %s", strerror(errno)));
605+
}
606+
len -= nwrite;
607+
offset += nwrite;
608+
}
609+
#else
610+
auto * handle = (HANDLE)_get_osfhandle(_fileno(fp));
611+
DWORD nwrite;
612+
OVERLAPPED overlapped = {};
613+
overlapped.Offset = DWORD(offset);
614+
overlapped.OffsetHigh = DWORD(offset << 32);
615+
bool res = WriteFile(handle, ptr, len, &nwrite, &overlapped);
616+
if (!res) {
617+
auto error = GetLastError();
618+
throw std::runtime_error(format("WriteFile failed: %s", llama_format_win_err(error).c_str()));
619+
}
620+
#endif
621+
}
622+
567623
void write_u32(std::uint32_t val) const {
568624
write_raw(&val, sizeof(val));
569625
}
@@ -1446,16 +1502,7 @@ struct llama_model_loader {
14461502
if (use_mmap) {
14471503
cur->data = (uint8_t *) mapping->addr + offs;
14481504
} else {
1449-
#ifndef _WIN32
1450-
pread(fileno(file.fp), cur->data, ggml_nbytes(cur), offs);
1451-
#else
1452-
auto * handle = (HANDLE)_get_osfhandle(_fileno(file.fp));
1453-
DWORD nread;
1454-
OVERLAPPED overlapped = {};
1455-
overlapped.Offset = DWORD(offs);
1456-
overlapped.OffsetHigh = DWORD(offs << 32);
1457-
ReadFile(handle, cur->data, ggml_nbytes(cur), &nread, &overlapped);
1458-
#endif
1505+
file.pread_raw(cur->data, ggml_nbytes(cur), offs);
14591506
}
14601507
}
14611508

@@ -4916,32 +4963,26 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
49164963
gguf_add_tensor(ctx_out, meta);
49174964
}
49184965

4919-
auto * fout = fopen(fname_out.c_str(), "wb");
4966+
auto fout = llama_file(fname_out.c_str(), "wb");
49204967

49214968
const size_t meta_size = gguf_get_meta_size(ctx_out);
49224969

49234970
LLAMA_LOG_INFO("%s: meta size = %zu bytes\n", __func__, meta_size);
49244971

49254972
// placeholder for the meta data
4926-
fseek(fout, meta_size, SEEK_SET);
4973+
fout.seek(meta_size, SEEK_SET);
49274974

49284975
std::vector<std::vector<no_init<float>>> f32_conv_buf_pool(nthreads2);
49294976
std::mutex log_mutex;
49304977

4931-
#ifndef _WIN32
4932-
using off_type = off_t;
4933-
#else
4934-
using off_type = LONGLONG;
4935-
#endif
4936-
49374978
std::vector<ggml_type> quant_tensor_types;
49384979
std::vector<size_t> quant_tensor_sizes;
4939-
std::vector<off_type> quant_tensor_offsets;
4980+
std::vector<int64_t> quant_tensor_offsets;
49404981
quant_tensor_types.reserve(ml->n_tensors);
49414982
quant_tensor_sizes.reserve(ml->n_tensors);
49424983
quant_tensor_offsets.reserve(ml->n_tensors);
49434984

4944-
off_type fpos = meta_size;
4985+
int64_t fpos = meta_size;
49454986
for (int i = 0; i < ml->n_tensors; ++i) {
49464987
struct ggml_tensor * tensor = ml->get_tensor_meta(i);
49474988

@@ -5094,16 +5135,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
50945135
log_lock.unlock();
50955136

50965137
// write tensor data
5097-
#ifndef _WIN32
5098-
pwrite(fileno(fout), new_data, new_size, quant_tensor_offsets[i]);
5099-
#else
5100-
auto* handle = (HANDLE)_get_osfhandle(_fileno(fout));
5101-
DWORD nwrite;
5102-
OVERLAPPED overlapped = {};
5103-
overlapped.Offset = DWORD(quant_tensor_offsets[i]);
5104-
overlapped.OffsetHigh = DWORD(quant_tensor_offsets[i] << 32);
5105-
WriteFile(handle, new_data, new_size, &nwrite, &overlapped);
5106-
#endif
5138+
fout.pwrite_raw(new_data, new_size, quant_tensor_offsets[i]);
51075139
};
51085140

51095141
{
@@ -5115,18 +5147,16 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
51155147
}
51165148

51175149
// write final padding
5118-
fseek(fout, fpos, SEEK_SET);
5150+
fout.seek(fpos, SEEK_SET);
51195151

51205152
// go back to beginning of file and write the updated meta data
51215153
{
5122-
rewind(fout);
5154+
fout.seek(0, SEEK_SET);
51235155
std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
51245156
gguf_get_meta_data(ctx_out, data.data());
5125-
fwrite(data.data(), data.size(), 1, fout);
5157+
fout.write_raw(data.data(), data.size());
51265158
}
51275159

5128-
fclose(fout);
5129-
51305160
gguf_free(ctx_out);
51315161

51325162
LLAMA_LOG_INFO("%s: model size = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0);

0 commit comments

Comments
 (0)