Skip to content

Commit 18e4240

Browse files
authored
Implement load_into for Mmap Data Loader (#11654)
### Summary - Adds `load_into()` to support memory mapped loading directly into the caller's buffer. - Enables copying a specific byte range from the file without creating an internal buffer. - Resuses input validation from `load()`. Fixes #11561 ### Test plan - Added unit tests to `MmapDataLoaderTest` for `load_into()` to validate copying for both aligned and offset data. - All Mmap Data Loader tests pass via: ``` ./build-ninja/extension/data_loader/test/extension_data_loader_test --gtest_filter='MmapDataLoaderTest.*' ``` - Full set of Data Loader tests pass via: ``` ./build-ninja/extension/data_loader/test/extension_data_loader_test ```
1 parent 1793bae commit 18e4240

File tree

3 files changed

+144
-4
lines changed

3 files changed

+144
-4
lines changed

extension/data_loader/mmap_data_loader.cpp

Lines changed: 80 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -150,10 +150,10 @@ void MunmapSegment(void* context, void* data, size_t size) {
150150
}
151151
} // namespace
152152

153-
Result<FreeableBuffer> MmapDataLoader::load(
154-
size_t offset,
155-
size_t size,
156-
ET_UNUSED const DataLoader::SegmentInfo& segment_info) const {
153+
/**
154+
* Validates that file read range is within bounds.
155+
*/
156+
Error MmapDataLoader::validate_input(size_t offset, size_t size) const {
157157
ET_CHECK_OR_RETURN_ERROR(
158158
// Probably had its value moved to another instance.
159159
fd_ >= 0,
@@ -173,6 +173,18 @@ Result<FreeableBuffer> MmapDataLoader::load(
173173
InvalidArgument,
174174
"Offset %zu too large for off_t",
175175
offset);
176+
return Error::Ok;
177+
}
178+
179+
Result<FreeableBuffer> MmapDataLoader::load(
180+
size_t offset,
181+
size_t size,
182+
ET_UNUSED const DataLoader::SegmentInfo& segment_info) const {
183+
// Ensure read range is valid.
184+
auto err = validate_input(offset, size);
185+
if (err != Error::Ok) {
186+
return err;
187+
}
176188

177189
// mmap() will fail if the size is zero.
178190
if (size == 0) {
@@ -267,5 +279,69 @@ Result<size_t> MmapDataLoader::size() const {
267279
return file_size_;
268280
}
269281

282+
Error MmapDataLoader::load_into(
283+
size_t offset,
284+
size_t size,
285+
ET_UNUSED const SegmentInfo& segment_info,
286+
void* buffer) const {
287+
ET_CHECK_OR_RETURN_ERROR(
288+
buffer != nullptr, InvalidArgument, "Buffer is null");
289+
290+
// Ensure read range is valid.
291+
auto err = validate_input(offset, size);
292+
if (err != Error::Ok) {
293+
return err;
294+
}
295+
296+
// Nothing to copy.
297+
if (size == 0) {
298+
return Error::Ok;
299+
}
300+
301+
// Find the range of pages that covers the requested region.
302+
Range range =
303+
get_overlapping_pages(static_cast<uintptr_t>(offset), size, page_size_);
304+
305+
size_t map_size = range.size;
306+
if (range.start + map_size > file_size_) {
307+
// Clamp to the end of the file.
308+
//
309+
// The Windows implementation of mmap uses CreateFileMapping which returns
310+
// error STATUS_SECTION_TOO_BIG (0xc0000040) if we try to map past the end
311+
// of the last page of a file mapped in as read-only.
312+
map_size = file_size_ - range.start;
313+
}
314+
315+
// Map the pages read-only. MAP_PRIVATE vs. MAP_SHARED doesn't matter since
316+
// the data is read-only, but use PRIVATE just to further avoid accidentally
317+
// modifying the file.
318+
void* pages = ::mmap(
319+
nullptr,
320+
map_size,
321+
PROT_READ,
322+
MAP_PRIVATE,
323+
fd_,
324+
static_cast<off_t>(range.start));
325+
ET_CHECK_OR_RETURN_ERROR(
326+
pages != MAP_FAILED,
327+
AccessFailed,
328+
"Failed to map %s: mmap(..., size=%zd, ..., fd=%d, offset=0x%zx)",
329+
file_name_,
330+
range.size,
331+
fd_,
332+
range.start);
333+
334+
// Offset into mapped region.
335+
const size_t map_delta = offset - range.start;
336+
337+
// Copy data into caller's buffer.
338+
std::memcpy(buffer, static_cast<uint8_t*>(pages) + map_delta, size);
339+
340+
// Unmap mapped region.
341+
::munmap(pages, map_size);
342+
343+
return Error::Ok;
344+
}
345+
270346
} // namespace extension
271347
} // namespace executorch

extension/data_loader/mmap_data_loader.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,13 @@ class MmapDataLoader final : public executorch::runtime::DataLoader {
9595

9696
ET_NODISCARD executorch::runtime::Result<size_t> size() const override;
9797

98+
ET_NODISCARD
99+
executorch::runtime::Error load_into(
100+
size_t offset,
101+
size_t size,
102+
ET_UNUSED const SegmentInfo& segment_info,
103+
void* buffer) const override;
104+
98105
private:
99106
MmapDataLoader(
100107
int fd,
@@ -113,6 +120,10 @@ class MmapDataLoader final : public executorch::runtime::DataLoader {
113120
MmapDataLoader& operator=(const MmapDataLoader&) = delete;
114121
MmapDataLoader& operator=(MmapDataLoader&&) = delete;
115122

123+
ET_NODISCARD executorch::runtime::Error validate_input(
124+
size_t offset,
125+
size_t size) const;
126+
116127
const char* const file_name_; // String data is owned by the instance.
117128
const size_t file_size_;
118129
const size_t page_size_;

extension/data_loader/test/mmap_data_loader_test.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,3 +376,56 @@ TEST_F(MmapDataLoaderTest, DEPRECATEDFrom) {
376376
ASSERT_EQ(total_size.error(), Error::Ok);
377377
EXPECT_EQ(*total_size, contents_size);
378378
}
379+
380+
// Tests that load_into copies bytes correctly.
381+
TEST_F(MmapDataLoaderTest, LoadIntoCopiesCorrectly) {
382+
// Create a test string.
383+
const char* test_text = "FILE_CONTENTS";
384+
const size_t text_size = std::strlen(test_text);
385+
TempFile tf(test_text);
386+
387+
// Wrap it in a loader.
388+
Result<MmapDataLoader> mdl = MmapDataLoader::from(tf.path().c_str());
389+
ASSERT_EQ(mdl.error(), Error::Ok);
390+
391+
// Destination buffer.
392+
std::vector<uint8_t> dst(text_size);
393+
394+
// Call load_into()
395+
Error err = mdl->load_into(
396+
/*offset=*/0,
397+
/*size=*/text_size,
398+
DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::Program),
399+
dst.data());
400+
ASSERT_EQ(err, Error::Ok);
401+
402+
// Verify memory copied correctly.
403+
EXPECT_EQ(0, std::memcmp(dst.data(), test_text, text_size));
404+
}
405+
406+
// Tests that load_into copies offset slice correctly.
407+
TEST_F(MmapDataLoaderTest, LoadIntoCopiesOffsetCorrectly) {
408+
// Create a test string.
409+
const char* contents = "ABCDEFGH";
410+
TempFile tf(contents);
411+
412+
// Wrap it in a loader.
413+
Result<MmapDataLoader> mdl = MmapDataLoader::from(tf.path().c_str());
414+
ASSERT_EQ(mdl.error(), Error::Ok);
415+
416+
// Copying 3 bytes starting at offset 2 = "CDE"
417+
const size_t offset = 2;
418+
const size_t size = 3;
419+
uint8_t dst[size];
420+
421+
// Call load_into()
422+
Error err = mdl->load_into(
423+
offset,
424+
size,
425+
DataLoader::SegmentInfo(DataLoader::SegmentInfo::Type::Program),
426+
dst);
427+
ASSERT_EQ(err, Error::Ok);
428+
429+
// Verify memory copied correctly.
430+
EXPECT_EQ(0, std::memcmp(dst, contents + offset, size));
431+
}

0 commit comments

Comments
 (0)