15
15
#else /* BPE */
16
16
#include < executorch/examples/models/llama2/tokenizer/bpe_tokenizer.h>
17
17
#endif /* ET_USE_TIKTOKEN*/
18
- #include < executorch/extension/data_loader/file_data_loader.h>
19
18
#include < executorch/extension/evalue_util/print_evalue.h>
20
19
#include < executorch/extension/runner_util/managed_tensor.h>
21
20
@@ -43,7 +42,10 @@ Runner::Runner(
43
42
const std::string& model_path,
44
43
const std::string& tokenizer_path,
45
44
const float temperature)
46
- : model_path_(model_path),
45
+ // NOTE: we observed ~2x loading performance increase on iPhone 15
46
+ // and a ~5% improvement on Galaxy S22 by switching to
47
+ // FileDataLoader instead of MmapDataLoader + UseMlockIgnoreErrors.
48
+ : module_(std::make_unique<Module>(model_path, Module::LoadMode::File)),
47
49
tokenizer_path_ (tokenizer_path),
48
50
temperature_(temperature) {
49
51
ET_LOG (
@@ -54,22 +56,13 @@ Runner::Runner(
54
56
}
55
57
56
58
bool Runner::is_loaded () const {
57
- return module_ && module_ ->is_loaded () && tokenizer_ && sampler_;
59
+ return module_->is_loaded () && tokenizer_ && sampler_;
58
60
}
59
61
60
62
Error Runner::load () {
61
63
if (is_loaded ()) {
62
64
return Error::Ok;
63
65
}
64
- // NOTE: we observed ~2x loading performance increase on iPhone 15
65
- // and a ~5% improvement on Galaxy S22 by switching to
66
- // FileDataLoader instead of MmapDataLoader + UseMlockIgnoreErrors.
67
- auto data_loader_result = util::FileDataLoader::from (model_path_.c_str ());
68
- if (!data_loader_result.ok ()) {
69
- return data_loader_result.error ();
70
- }
71
- module_ = std::make_unique<Module>(
72
- std::make_unique<util::FileDataLoader>(std::move (*data_loader_result)));
73
66
ET_CHECK_OK_OR_RETURN_ERROR (module_->load_method (" forward" ));
74
67
75
68
// Read out metadata: vocab_size (expected by the model), BOS, EOS, n_BOS,
0 commit comments