Skip to content

Commit 5a5dbc5

Browse files
committed
[LLava] Fix stats for C++ runner
Before: I 00:00:28.414816 executorch:stats.h:84] Prompt Tokens: 616 Generated Tokens: 33 I 00:00:28.414826 executorch:stats.h:90] Model Load Time: 9.244000 (seconds) I 00:00:28.414835 executorch:stats.h:100] Total inference time: 0.000000 (seconds) Rate: inf (tokens/second) I 00:00:28.414838 executorch:stats.h:108] Prompt evaluation: 0.000000 (seconds) Rate: inf (tokens/second) I 00:00:28.414839 executorch:stats.h:119] Generated 33 tokens: 0.000000 (seconds) Rate: inf (tokens/second) I 00:00:28.414841 executorch:stats.h:127] Time to first generated token: 0.000000 (seconds) I 00:00:28.414842 executorch:stats.h:134] Sampling time over 649 tokens: 0.002000 (seconds) With real image on M1: I 00:00:34.231017 executorch:stats.h:84] Prompt Tokens: 616 Generated Tokens: 33 I 00:00:34.231028 executorch:stats.h:90] Model Load Time: 9.108000 (seconds) I 00:00:34.231038 executorch:stats.h:100] Total inference time: 25.103000 (seconds) Rate: 1.314584 (tokens/second) I 00:00:34.231040 executorch:stats.h:108] Prompt evaluation: 11.544000 (seconds) Rate: 53.361053 (tokens/second) I 00:00:34.231042 executorch:stats.h:119] Generated 33 tokens: 13.559000 (seconds) Rate: 2.433808 (tokens/second) I 00:00:34.231043 executorch:stats.h:127] Time to first generated token: 11.544000 (seconds) I 00:00:34.231045 executorch:stats.h:134] Sampling time over 649 tokens: 0.000000 (seconds) With bogus image (same dims) on Android S23: I 00:00:34.649120 executorch:stats.h:84] Prompt Tokens: 616 Generated Tokens: 33 I 00:00:34.649128 executorch:stats.h:90] Model Load Time: 12.337000 (seconds) I 00:00:34.649169 executorch:stats.h:100] Total inference time: 22.301000 (seconds) Rate: 1.479754 (tokens/second) I 00:00:34.649174 executorch:stats.h:108] Prompt evaluation: 17.964000 (seconds) Rate: 34.290804 (tokens/second) I 00:00:34.649179 executorch:stats.h:119] Generated 33 tokens: 4.337000 (seconds) Rate: 7.608946 (tokens/second) I 00:00:34.649183 executorch:stats.h:127] Time to first generated token: 17.964000 (seconds) I 00:00:34.649186 executorch:stats.h:134] Sampling time over 649 tokens: 0.001000 (seconds)
1 parent 549f14b commit 5a5dbc5

File tree

2 files changed

+7
-1
lines changed

2 files changed

+7
-1
lines changed

.ci/scripts/test_llava.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ if hash nproc &> /dev/null; then NPROC=$(nproc); fi
3333
EXECUTORCH_COMMON_CMAKE_ARGS=" \
3434
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
3535
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
36+
-DEXECUTORCH_ENABLE_LOGGING=ON \
3637
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
3738
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
3839
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \

examples/models/llava/runner/llava_runner.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ Error LlavaRunner::generate_from_pos(
108108

109109
uint64_t prefill_next_token =
110110
ET_UNWRAP(prefill_prompt(prompt, start_pos, /*bos=*/0, /*eos*/ 0));
111+
stats_.first_token_ms = util::time_in_ms();
112+
stats_.prompt_eval_end_ms = util::time_in_ms();
111113
stats_.num_prompt_tokens = start_pos;
112114

113115
// Generate tokens
@@ -116,7 +118,6 @@ Error LlavaRunner::generate_from_pos(
116118

117119
// Bookkeeping
118120
stats_.num_generated_tokens = num_generated_tokens;
119-
::executorch::llm::print_report(stats_);
120121
if (stats_callback) {
121122
stats_callback(stats_);
122123
}
@@ -151,6 +152,7 @@ Error LlavaRunner::generate(
151152
};
152153

153154
int64_t pos = 0;
155+
stats_.inference_start_ms = util::time_in_ms();
154156

155157
// prefill preset prompt
156158
prefill_prompt(kPresetPrompt, pos, /*bos=*/1, /*eos*/ 0);
@@ -167,6 +169,9 @@ Error LlavaRunner::generate(
167169
Error err = generate_from_pos(
168170
prompt, seq_len, pos, wrapped_callback, stats_callback, echo);
169171

172+
stats_.inference_end_ms = util::time_in_ms();
173+
::executorch::llm::print_report(stats_);
174+
170175
ET_LOG(
171176
Info,
172177
"RSS after finishing text generation: %f MiB (0 if unsupported)",

0 commit comments

Comments
 (0)