Skip to content
This repository was archived by the owner on Oct 23, 2023. It is now read-only.

Commit 48eb26c

Browse files
committed
Limited the amount of data read by getContentAsString
1 parent 819a51e commit 48eb26c

File tree

8 files changed

+162
-96
lines changed

8 files changed

+162
-96
lines changed

capnp/server.capnp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ interface FrontendContext {
7171
# The following methods should only be called after the computational
7272
# DAG is fully defined.
7373
startEvaluation @3 (sender :FileSender);
74-
getFileContents @4 (file :File, receiver :FileReceiver);
74+
getFileContents @4 (file :File, receiver :FileReceiver,
75+
amount :UInt64 = 0xffffffffffffffff);
7576
stopEvaluation @5 ();
7677
}
7778

cpp/frontend/frontend.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@ class FileProvider : public capnproto::FileSender::Server {
7878

7979
kj::Promise<void> requestFile(RequestFileContext context) override {
8080
util::FileWrapper* file = &known_files_.at(context.getParams().getHash());
81-
return util::File::HandleRequestFile(file,
82-
context.getParams().getReceiver());
81+
return util::File::HandleRequestFile(
82+
file, context.getParams().getReceiver(), 0xffffffffffffffff);
8383
}
8484

8585
private:
@@ -90,12 +90,12 @@ class FileProvider : public capnproto::FileSender::Server {
9090
} // namespace
9191

9292
void File::getContentsAsString(
93-
const std::function<void(const std::string&)>& callback) {
93+
const std::function<void(const std::string&)>& callback, uint64_t limit) {
9494
auto pf = kj::newPromiseAndFulfiller<void>();
9595
frontend_.builder_.AddPromise(std::move(pf.promise));
9696
frontend_.finish_builder_.AddPromise(
9797
forked_promise.addBranch().then(
98-
[this, callback,
98+
[this, callback, limit,
9999
fulfiller = std::move(pf.fulfiller)](auto file) mutable {
100100
auto req = frontend_.frontend_context_.getFileContentsRequest();
101101
auto output = kj::heap<std::string>();
@@ -105,6 +105,7 @@ void File::getContentsAsString(
105105
[output = std::move(output)](util::File::Chunk data) mutable {
106106
*output += std::string(data.asChars().begin(), data.size());
107107
}));
108+
req.setAmount(limit);
108109
kj::Promise<void> ret = req.send().ignoreResult().then(
109110
[output_ptr, callback]() { callback(*output_ptr); });
110111
fulfiller->fulfill();

cpp/frontend/frontend.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ class File {
9999
// Call the provided callback with the contents of the file as soon as they
100100
// are available.
101101
void getContentsAsString(
102-
const std::function<void(const std::string&)>& callback);
102+
const std::function<void(const std::string&)>& callback,
103+
uint64_t limit = 0xffffffffffffffff);
103104

104105
// Write the file to path when it is available.
105106
void getContentsToFile(const std::string& path, bool overwrite,
@@ -202,8 +203,8 @@ class Frontend {
202203

203204
// Defines a file that is provided by the frontend, loading it from its
204205
// content.
205-
File *provideFileContent(const std::string &content,
206-
const std::string &description, bool is_executable);
206+
File* provideFileContent(const std::string& content,
207+
const std::string& description, bool is_executable);
207208

208209
// Creates a new execution with the given description.
209210
Execution* addExecution(const std::string& description);

cpp/frontend/python/frontend.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,8 @@ PYBIND11_MODULE(task_maker_frontend, m) {
104104

105105
pybind11::class_<frontend::File>(m, "File")
106106
.def("getContentsAsString",
107-
[](frontend::File& f, std::function<void(std::string)> cb) {
107+
[](frontend::File& f, std::function<void(std::string)> cb,
108+
uint64_t limit = 0xffffffffffffffff) {
108109
f.getContentsAsString(
109110
[cb = destroy_with_gil(cb)](std::string s) mutable {
110111
pybind11::gil_scoped_acquire acquire;
@@ -115,23 +116,27 @@ PYBIND11_MODULE(task_maker_frontend, m) {
115116
<< exc.what() << std::endl;
116117
_Exit(1);
117118
}
118-
});
119+
},
120+
limit);
119121
},
120-
"callback"_a)
122+
"callback"_a, "limit"_a)
121123
.def("getContentsAsBytes",
122-
[](frontend::File &f, std::function<void(pybind11::bytes)> cb) {
124+
[](frontend::File& f, std::function<void(pybind11::bytes)> cb,
125+
uint64_t limit = 0xffffffffffffffff) {
123126
f.getContentsAsString(
124127
[cb = destroy_with_gil(cb)](std::string s) mutable {
125128
pybind11::gil_scoped_acquire acquire;
126129
try {
127130
(*cb)(s);
128-
} catch (pybind11::error_already_set &exc) {
131+
} catch (pybind11::error_already_set& exc) {
129132
std::cerr << __FILE__ << ":" << __LINE__ << " "
130133
<< exc.what() << std::endl;
131134
_Exit(1);
132135
}
133-
});
134-
})
136+
},
137+
limit);
138+
},
139+
"callback"_a, "limit"_a)
135140
.def("getContentsToFile", &frontend::File::getContentsToFile, "path"_a,
136141
"overwrite"_a = true, "exist_ok"_a = true);
137142

cpp/server/server.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,8 @@ kj::Promise<void> FrontendContext::getFileContents(
555555
KJ_LOG(INFO, "Sending file with id " + std::to_string(id), hash.Hex());
556556
auto ff = fulfiller.get();
557557
return util::File::HandleRequestFile(hash,
558-
context.getParams().getReceiver())
558+
context.getParams().getReceiver(),
559+
context.getParams().getAmount())
559560
.then(
560561
[id, fulfiller = std::move(fulfiller)]() mutable {
561562
fulfiller->fulfill();

cpp/util/file.cpp

Lines changed: 62 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -140,18 +140,22 @@ int OsAtomicCopy(const std::string& src, const std::string& dst,
140140
return 0;
141141
}
142142

143-
util::File::ChunkProducer OsRead(const std::string& path) {
143+
util::File::ChunkProducer OsRead(const std::string& path, uint64_t limit) {
144144
kj::AutoCloseFd fd{open(path.c_str(), O_CLOEXEC | O_RDONLY)}; // NOLINT
145145
if (fd.get() == -1) {
146146
throw std::system_error(errno, std::system_category(), "Read " + path);
147147
}
148-
return [fd = std::move(fd), path,
148+
std::unique_ptr<size_t> alreadyRead = std::make_unique<size_t>(0);
149+
return [fd = std::move(fd), path, alreadyRead = std::move(alreadyRead), limit,
149150
buf = std::array<kj::byte, util::kChunkSize>()]() mutable {
150151
if (fd.get() == -1) return util::File::Chunk();
151152
ssize_t amount;
152-
while ((amount = read(fd, buf.data(), util::kChunkSize))) { // NOLINT
153+
size_t toRead = util::kChunkSize;
154+
if (*alreadyRead + toRead > limit) toRead = limit - *alreadyRead;
155+
while ((amount = read(fd, buf.data(), toRead))) { // NOLINT
153156
if (amount == -1 && errno == EINTR) continue;
154157
if (amount == -1) break;
158+
*alreadyRead += amount;
155159
return util::File::Chunk(buf.data(), amount);
156160
}
157161
if (amount == -1) {
@@ -219,7 +223,9 @@ std::vector<std::string> File::ListFiles(const std::string& path) {
219223
return OsListFiles(path);
220224
}
221225

222-
File::ChunkProducer File::Read(const std::string& path) { return OsRead(path); }
226+
File::ChunkProducer File::Read(const std::string& path, uint64_t limit) {
227+
return OsRead(path, limit);
228+
}
223229
File::ChunkReceiver File::Write(const std::string& path, bool overwrite,
224230
bool exist_ok) {
225231
MakeDirs(BaseDir(path));
@@ -403,68 +409,76 @@ kj::Promise<void> next_chunk(HandleRequestFileData data) {
403409
} // namespace
404410

405411
kj::Promise<void> File::HandleRequestFile(
406-
FileWrapper *wrapper, capnproto::FileReceiver::Client receiver) {
412+
FileWrapper* wrapper, capnproto::FileReceiver::Client receiver,
413+
uint64_t amount) {
407414
// TODO: see if we can easily avoid the extra round-trips while
408415
// still guaranteeing in-order processing (when capnp implements streams?)
409416
// Possibly by using UnionPromiseBuilder?
410417
if (HandleRequestFileData::num_concurrent <
411418
HandleRequestFileData::max_concurrent) {
412419
HandleRequestFileData::num_concurrent++;
413-
HandleRequestFileData data{wrapper->Read(), receiver};
420+
HandleRequestFileData data{wrapper->Read(amount), receiver};
414421
return next_chunk(std::move(data));
415422
}
416423
auto pf = kj::newPromiseAndFulfiller<void>();
417424
HandleRequestFileData::waiting.push(std::move(pf.fulfiller));
418-
return pf.promise.then([wrapper = wrapper, receiver]() mutable {
419-
return File::HandleRequestFile(wrapper, receiver);
425+
return pf.promise.then([wrapper = wrapper, receiver, amount]() mutable {
426+
return File::HandleRequestFile(wrapper, receiver, amount);
420427
});
421428
}
422429

423-
kj::Promise<void> File::HandleRequestFile(
424-
const util::SHA256_t &hash, capnproto::FileReceiver::Client receiver) {
425-
if (hash.hasContents()) {
426-
auto req = receiver.sendChunkRequest();
427-
req.setChunk(hash.getContents());
428-
return req.send().ignoreResult().then([receiver]() mutable {
429-
return receiver.sendChunkRequest().send().ignoreResult();
430-
});
431-
}
432-
util::FileWrapper wrapper = FileWrapper::FromPath(PathForHash(hash));
433-
return HandleRequestFile(&wrapper, receiver);
430+
kj::Promise<void> File::HandleRequestFile(
431+
const util::SHA256_t& hash, capnproto::FileReceiver::Client receiver,
432+
uint64_t amount) {
433+
if (hash.hasContents()) {
434+
auto req = receiver.sendChunkRequest();
435+
kj::ArrayPtr<const uint8_t> chunk = hash.getContents();
436+
if (chunk.size() > amount) chunk = {chunk.begin(), amount};
437+
req.setChunk(chunk);
438+
return req.send().ignoreResult().then([receiver]() mutable {
439+
return receiver.sendChunkRequest().send().ignoreResult();
440+
});
434441
}
442+
util::FileWrapper wrapper = FileWrapper::FromPath(PathForHash(hash));
443+
return HandleRequestFile(&wrapper, receiver, amount);
444+
}
435445

436-
FileWrapper FileWrapper::FromPath(std::string path) {
437-
FileWrapper file;
438-
file.type_ = FileWrapper::FileWrapperType::PATH;
439-
file.path_ = std::move(path);
440-
return file;
441-
}
446+
FileWrapper FileWrapper::FromPath(std::string path) {
447+
FileWrapper file;
448+
file.type_ = FileWrapper::FileWrapperType::PATH;
449+
file.path_ = std::move(path);
450+
return file;
451+
}
442452

443-
FileWrapper FileWrapper::FromContent(std::string content) {
444-
FileWrapper file;
445-
file.type_ = FileWrapper::FileWrapperType::CONTENT;
446-
file.content_ = std::move(content);
447-
return file;
453+
FileWrapper FileWrapper::FromContent(std::string content) {
454+
FileWrapper file;
455+
file.type_ = FileWrapper::FileWrapperType::CONTENT;
456+
file.content_ = std::move(content);
457+
return file;
458+
}
459+
460+
File::ChunkProducer FileWrapper::Read(uint64_t limit) {
461+
if (type_ == FileWrapper::FileWrapperType::PATH) {
462+
return File::Read(path_, limit);
448463
}
449464

450-
File::ChunkProducer FileWrapper::Read() {
451-
if (type_ == FileWrapper::FileWrapperType::PATH) return File::Read(path_);
452-
453-
std::unique_ptr<size_t> pos = std::make_unique<size_t>(0);
454-
return [this, pos = std::move(pos)]() mutable {
455-
if (*pos < content_.size()) {
456-
auto end =
457-
std::min(content_.begin() + *pos + util::kChunkSize,
458-
content_.end());
459-
size_t amount = end - content_.begin() - *pos;
460-
auto chunk = util::File::Chunk(
461-
// NOLINTNEXTLINE
462-
reinterpret_cast<const kj::byte *>(&content_[0] + *pos), amount);
463-
*pos += amount;
464-
return chunk;
465+
std::unique_ptr<size_t> pos = std::make_unique<size_t>(0);
466+
return [this, pos = std::move(pos), limit]() mutable {
467+
if (*pos < content_.size() && *pos < limit) {
468+
auto end = content_.begin() + *pos + util::kChunkSize;
469+
if (end > content_.end()) end = content_.end();
470+
if (static_cast<size_t>(end - content_.begin()) > limit) {
471+
end = content_.begin() + limit;
465472
}
466-
return util::File::Chunk();
467-
};
468-
}
473+
size_t amount = end - content_.begin() - *pos;
474+
auto chunk = util::File::Chunk(
475+
// NOLINTNEXTLINE
476+
reinterpret_cast<const kj::byte*>(&content_[0] + *pos), amount);
477+
*pos += amount;
478+
return chunk;
479+
}
480+
return util::File::Chunk();
481+
};
482+
}
469483

470484
} // namespace util

cpp/util/file.hpp

Lines changed: 32 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ namespace util {
1414
static const constexpr uint32_t kChunkSize = 1024 * 1024;
1515
static const constexpr uint32_t kInlineChunkThresh = 1024;
1616

17-
class FileWrapper;
17+
class FileWrapper;
1818

1919
class File {
2020
public:
@@ -34,7 +34,8 @@ class File {
3434
static std::vector<std::string> ListFiles(const std::string& path);
3535

3636
// Reads the file specified by path in chunks.
37-
static ChunkProducer Read(const std::string& path);
37+
static ChunkProducer Read(const std::string& path,
38+
uint64_t limit = 0xffffffffffffffff);
3839

3940
// Returns a receiver that writes to the given file, the file ends when an
4041
// empty chunk is received, and finalizes the write when destroyed.
@@ -95,16 +96,18 @@ class File {
9596

9697
// Utility to implement RequestFile methods, given the path and the receiver
9798
static kj::Promise<void> HandleRequestFile(
98-
FileWrapper *wrapper, capnproto::FileReceiver::Client receiver);
99+
FileWrapper* wrapper, capnproto::FileReceiver::Client receiver,
100+
uint64_t amount);
99101
static kj::Promise<void> HandleRequestFile(
100-
const util::SHA256_t &hash, capnproto::FileReceiver::Client receiver);
102+
const util::SHA256_t& hash, capnproto::FileReceiver::Client receiver,
103+
uint64_t amount);
101104

102105
// Utility to implement RequestFile methods
103106
template <typename RequestFileContext>
104107
static kj::Promise<void> HandleRequestFile(RequestFileContext context) {
105108
auto hash = context.getParams().getHash();
106109
auto receiver = context.getParams().getReceiver();
107-
return HandleRequestFile(hash, receiver);
110+
return HandleRequestFile(hash, receiver, 0xffffffffffffffff);
108111
}
109112

110113
// Simple capnproto server implementation to receive a file
@@ -185,40 +188,38 @@ class TempDir {
185188
};
186189

187190
// Wrapper around a File, it can be a path to a file or the content of a file.
188-
class FileWrapper {
189-
enum class FileWrapperType {
190-
PATH = 0, CONTENT = 1
191-
};
191+
class FileWrapper {
192+
enum class FileWrapperType { PATH = 0, CONTENT = 1 };
192193

193-
FileWrapper() = default;
194-
KJ_DISALLOW_COPY(FileWrapper);
194+
FileWrapper() = default;
195+
KJ_DISALLOW_COPY(FileWrapper);
195196

196-
public:
197-
~FileWrapper() = default;
197+
public:
198+
~FileWrapper() = default;
198199

199-
FileWrapper(FileWrapper &&other) noexcept { *this = std::move(other); }
200+
FileWrapper(FileWrapper&& other) noexcept { *this = std::move(other); }
200201

201-
FileWrapper &operator=(FileWrapper &&other) noexcept {
202-
path_ = std::move(other.path_);
203-
content_ = std::move(other.content_);
204-
type_ = other.type_;
205-
return *this;
206-
}
202+
FileWrapper& operator=(FileWrapper&& other) noexcept {
203+
path_ = std::move(other.path_);
204+
content_ = std::move(other.content_);
205+
type_ = other.type_;
206+
return *this;
207+
}
207208

208-
// Create a wrapper from a file path
209-
static FileWrapper FromPath(std::string path);
209+
// Create a wrapper from a file path
210+
static FileWrapper FromPath(std::string path);
210211

211-
// Create a wrapper from a file content
212-
static FileWrapper FromContent(std::string content);
212+
// Create a wrapper from a file content
213+
static FileWrapper FromContent(std::string content);
213214

214-
// Returns a ChunkProducer with the content of the wrapped file
215-
File::ChunkProducer Read();
215+
// Returns a ChunkProducer with the content of the wrapped file
216+
File::ChunkProducer Read(uint64_t limit);
216217

217-
private:
218-
std::string path_;
219-
std::string content_;
220-
FileWrapperType type_{};
221-
};
218+
private:
219+
std::string path_;
220+
std::string content_;
221+
FileWrapperType type_{};
222+
};
222223

223224
} // namespace util
224225

0 commit comments

Comments
 (0)