File tree Expand file tree Collapse file tree 2 files changed +53
-1
lines changed
examples/models/llama/runner Expand file tree Collapse file tree 2 files changed +53
-1
lines changed Original file line number Diff line number Diff line change 17
17
#include < string>
18
18
#include < unordered_map>
19
19
20
+ #include < executorch/extension/llm/runner/runner_interface.h>
20
21
#include < executorch/extension/llm/runner/stats.h>
21
22
#include < executorch/extension/llm/runner/text_decoder_runner.h>
22
23
#include < executorch/extension/llm/runner/text_prefiller.h>
26
27
27
28
namespace example {
28
29
29
- class ET_EXPERIMENTAL Runner {
30
+ class ET_EXPERIMENTAL Runner
31
+ : public executorch::extension::llm::RunnerInterface {
30
32
public:
31
33
explicit Runner (
32
34
const std::string& model_path,
Original file line number Diff line number Diff line change
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under the BSD-style license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ */
8
+
9
+ // An interface for LLM runners. Developers can create their own runner that
10
+ // implements their own load and generation logic to run the model.
11
+
12
+ #pragma once
13
+
14
+ #include < functional>
15
+ #include < string>
16
+
17
+ #include < executorch/extension/llm/runner/stats.h>
18
+ #include < executorch/extension/module/module.h>
19
+
20
+ namespace executorch {
21
+ namespace extension {
22
+ namespace llm {
23
+
24
+ class ET_EXPERIMENTAL RunnerInterface {
25
+ public:
26
+ virtual ~RunnerInterface () = default ;
27
+
28
+ // Checks if the model is loaded.
29
+ virtual bool is_loaded () const = 0;
30
+
31
+ // Load the model and tokenizer.
32
+ virtual ::executorch::runtime::Error load () = 0;
33
+
34
+ // Generate the output tokens.
35
+ virtual ::executorch::runtime::Error generate (
36
+ const std::string& prompt,
37
+ int32_t seq_len,
38
+ std::function<void (const std::string&)> token_callback = {},
39
+ std::function<void (const ::executorch::extension::llm::Stats&)>
40
+ stats_callback = {},
41
+ bool echo = true ,
42
+ bool warming = false ) = 0 ;
43
+
44
+ // Stop the generation.
45
+ virtual void stop () = 0;
46
+ };
47
+
48
+ } // namespace llm
49
+ } // namespace extension
50
+ } // namespace executorch
You can’t perform that action at this time.
0 commit comments