Skip to content

Commit 8beb9f7

Browse files
shoumikhinfacebook-github-bot
authored andcommitted
Simplify Module usage in LLama runner. (#4175)
Summary: Pull Request resolved: #4175 . Reviewed By: kirklandsign Differential Revision: D59498338
1 parent fd3f344 commit 8beb9f7

File tree

2 files changed

+5
-15
lines changed

2 files changed

+5
-15
lines changed

examples/models/llama2/runner/runner.cpp

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
#else /* BPE */
1616
#include <executorch/examples/models/llama2/tokenizer/bpe_tokenizer.h>
1717
#endif /* ET_USE_TIKTOKEN*/
18-
#include <executorch/extension/data_loader/file_data_loader.h>
1918
#include <executorch/extension/evalue_util/print_evalue.h>
2019
#include <executorch/extension/runner_util/managed_tensor.h>
2120

@@ -43,7 +42,10 @@ Runner::Runner(
4342
const std::string& model_path,
4443
const std::string& tokenizer_path,
4544
const float temperature)
46-
: model_path_(model_path),
45+
// NOTE: we observed ~2x loading performance increase on iPhone 15
46+
// and a ~5% improvement on Galaxy S22 by switching to
47+
// FileDataLoader instead of MmapDataLoader + UseMlockIgnoreErrors.
48+
: module_(std::make_unique<Module>(model_path, Module::LoadMode::File)),
4749
tokenizer_path_(tokenizer_path),
4850
temperature_(temperature) {
4951
ET_LOG(
@@ -54,22 +56,13 @@ Runner::Runner(
5456
}
5557

5658
bool Runner::is_loaded() const {
57-
return module_ && module_->is_loaded() && tokenizer_ && sampler_;
59+
return module_->is_loaded() && tokenizer_ && sampler_;
5860
}
5961

6062
Error Runner::load() {
6163
if (is_loaded()) {
6264
return Error::Ok;
6365
}
64-
// NOTE: we observed ~2x loading performance increase on iPhone 15
65-
// and a ~5% improvement on Galaxy S22 by switching to
66-
// FileDataLoader instead of MmapDataLoader + UseMlockIgnoreErrors.
67-
auto data_loader_result = util::FileDataLoader::from(model_path_.c_str());
68-
if (!data_loader_result.ok()) {
69-
return data_loader_result.error();
70-
}
71-
module_ = std::make_unique<Module>(
72-
std::make_unique<util::FileDataLoader>(std::move(*data_loader_result)));
7366
ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method("forward"));
7467

7568
// Read out metadata: vocab_size (expected by the model), BOS, EOS, n_BOS,

examples/models/llama2/runner/targets.bzl

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,6 @@ def define_common_targets():
3131
visibility = [
3232
"@EXECUTORCH_CLIENTS",
3333
],
34-
deps = [
35-
"//executorch/extension/data_loader:file_data_loader",
36-
],
3734
exported_deps = [
3835
"//executorch/backends/xnnpack:xnnpack_backend",
3936
"//executorch/examples/models/llama2/sampler:sampler" + aten_suffix,

0 commit comments

Comments
 (0)