@@ -29,62 +29,29 @@ class ET_EXPERIMENTAL LlavaRunner
29
29
const std::string& tokenizer_path,
30
30
const float temperature = 0 .8f )
31
31
: MultimodalRunner(model_path, tokenizer_path, temperature){};
32
- bool is_loaded ();
33
- ::executorch::runtime::Error load ();
34
- ::executorch::runtime::Error generate (
35
- std::vector<::executorch::extension::llm::Image> images,
32
+
33
+ bool is_loaded () override ;
34
+ Error load () override ;
35
+ Error generate (
36
+ std::vector<Image> images,
36
37
const std::string& prompt,
37
38
int32_t seq_len = 1024 ,
38
39
std::function<void (const std::string&)> token_callback = {},
39
40
std::function<void (const ::executorch::extension::llm::Stats&)>
40
- stats_callback = {},
41
- bool echo = true );
42
-
43
- /* *
44
- * Prefill an LLaVA Module with the given images input.
45
- * @param images The image input to LLaVA.
46
- * @param start_pos The starting position in KV cache of the input in the LLM.
47
- * It's passed as reference and will be updated inside this function.
48
- * @return The error status of prefilling images.
49
- */
50
- ::executorch::runtime::Error prefill_images (
51
- std::vector<::executorch::extension::llm::Image>& images,
52
- int64_t & start_pos);
53
-
54
- /* *
55
- * Prefill an LLaVA Module with the given text input.
56
- * @param prompt The text prompt to LLaVA.
57
- * @param start_pos The starting position in KV cache of the input in the LLM.
58
- * It's passed as reference and will be updated inside this function.
59
- * @param bos The number of BOS (begin of sequence) token.
60
- * @param eos The number of EOS (end of sequence) token.
61
- * @return The generated token of the LLaVA Module after prefill prompt.
62
- */
63
- ::executorch::runtime::Result<uint64_t > prefill_prompt (
41
+ stats_callback = {}) override ;
42
+ Error prefill_images (std::vector<Image>& images, int64_t & start_pos) override ;
43
+ Result<uint64_t > prefill_prompt (
64
44
const std::string& prompt,
65
45
int64_t & start_pos,
66
46
int8_t bos = 0 ,
67
- int8_t eos = 0 );
68
-
69
- /* *
70
- * Generate tokens from the given prompt, starting from the given position.
71
- * @param prompt The text prompt to LLaVA.
72
- * @param seq_len The total sequence length, including the prompt tokens and
73
- * new tokens.
74
- * @param start_pos The starting position in KV cache of the input in the LLM.
75
- * @param token_callback What to do after a token is generated.
76
- * @param stats_callback What to do with Stats.
77
- * @param echo Whether to echo the input prompt or not.
78
- * @return The error code.
79
- */
80
- ::executorch::runtime::Error generate_from_pos (
47
+ int8_t eos = 0 ) override ;
48
+ Error generate_from_pos (
81
49
const std::string& prompt,
82
50
int32_t seq_len = 1024 ,
83
51
int64_t start_pos = 0 ,
84
52
std::function<void (const std::string&)> token_callback = {},
85
53
std::function<void (const ::executorch::extension::llm::Stats&)>
86
- stats_callback = {},
87
- bool echo = true );
54
+ stats_callback = {}) override ;
88
55
89
56
private:
90
57
inline static const std::string kPresetPrompt =
0 commit comments