File tree Expand file tree Collapse file tree 4 files changed +63
-1
lines changed
examples/models/llama/runner Expand file tree Collapse file tree 4 files changed +63
-1
lines changed Original file line number Diff line number Diff line change 17
17
#include < string>
18
18
#include < unordered_map>
19
19
20
+ #include < executorch/extension/llm/runner/irunner.h>
20
21
#include < executorch/extension/llm/runner/stats.h>
21
22
#include < executorch/extension/llm/runner/text_decoder_runner.h>
22
23
#include < executorch/extension/llm/runner/text_prefiller.h>
26
27
27
28
namespace example {
28
29
29
- class ET_EXPERIMENTAL Runner {
30
+ class ET_EXPERIMENTAL Runner : public executorch::extension::llm::IRunner {
30
31
public:
31
32
explicit Runner (
32
33
const std::string& model_path,
Original file line number Diff line number Diff line change @@ -39,6 +39,7 @@ def define_common_targets():
39
39
],
40
40
exported_deps = [
41
41
"//executorch/backends/xnnpack:xnnpack_backend" ,
42
+ "//executorch/extension/llm/runner:irunner" ,
42
43
"//executorch/extension/llm/runner:stats" ,
43
44
"//executorch/extension/llm/runner:text_decoder_runner" + aten_suffix ,
44
45
"//executorch/extension/llm/runner:text_prefiller" + aten_suffix ,
Original file line number Diff line number Diff line change
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under the BSD-style license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ */
8
+
9
+ // An interface for LLM runners. Developers can create their own runner that
10
+ // implements their own load and generation logic to run the model.
11
+
12
+ #pragma once
13
+
14
+ #include < functional>
15
+ #include < string>
16
+
17
+ #include < executorch/extension/llm/runner/stats.h>
18
+ #include < executorch/extension/module/module.h>
19
+
20
+ namespace executorch {
21
+ namespace extension {
22
+ namespace llm {
23
+
24
+ class ET_EXPERIMENTAL IRunner {
25
+ public:
26
+ virtual ~IRunner () = default ;
27
+
28
+ // Checks if the model is loaded.
29
+ virtual bool is_loaded () const = 0;
30
+
31
+ // Load the model and tokenizer.
32
+ virtual ::executorch::runtime::Error load () = 0;
33
+
34
+ // Generate the output tokens.
35
+ virtual ::executorch::runtime::Error generate (
36
+ const std::string& prompt,
37
+ int32_t seq_len,
38
+ std::function<void (const std::string&)> token_callback = {},
39
+ std::function<void (const ::executorch::extension::llm::Stats&)>
40
+ stats_callback = {},
41
+ bool echo = true ,
42
+ bool warming = false ) = 0 ;
43
+
44
+ // Stop the generation.
45
+ virtual void stop () = 0;
46
+ };
47
+
48
+ } // namespace llm
49
+ } // namespace extension
50
+ } // namespace executorch
Original file line number Diff line number Diff line change 1
1
load ("@fbsource//xplat/executorch/build:runtime_wrapper.bzl" , "runtime" )
2
2
3
3
def define_common_targets ():
4
+ runtime .cxx_library (
5
+ name = "irunner" ,
6
+ exported_headers = [
7
+ "irunner.h" ,
8
+ ],
9
+ visibility = [
10
+ "@EXECUTORCH_CLIENTS" ,
11
+ ],
12
+ )
13
+
4
14
runtime .cxx_library (
5
15
name = "stats" ,
6
16
exported_headers = [
You can’t perform that action at this time.
0 commit comments