Skip to content

Commit 2551718

Browse files
committed
[Llama] Dump RSS info for Linux
1 parent 5f4a811 commit 2551718

File tree

3 files changed

+52
-0
lines changed

3 files changed

+52
-0
lines changed

examples/models/llama2/runner/runner.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,11 @@ Error Runner::generate(
153153
stats_.model_load_end_ms = util::time_in_ms();
154154
}
155155

156+
ET_LOG(
157+
Info,
158+
"RSS after loading model: %f MiB (0 if unsupported)",
159+
util::get_rss_bytes() / 1024.0 / 1024.0);
160+
156161
// Wrap the token_callback with print function
157162
std::function<void(const std::string&)> wrapped_callback =
158163
[token_callback](const std::string& piece) {
@@ -213,6 +218,10 @@ Error Runner::generate(
213218

214219
// print the first token from prefill. No prev_token so use cur_token for it.
215220
wrapped_callback(ET_UNWRAP(tokenizer_->decode(cur_token, cur_token)));
221+
ET_LOG(
222+
Info,
223+
"RSS after prompt prefill: %f MiB (0 if unsupported)",
224+
util::get_rss_bytes() / 1024.0 / 1024.0);
216225

217226
// start the main loop
218227
prompt_tokens.push_back(cur_token);
@@ -221,6 +230,10 @@ Error Runner::generate(
221230

222231
stats_.inference_end_ms = util::time_in_ms();
223232
printf("\n");
233+
ET_LOG(
234+
Info,
235+
"RSS after finishing text generation: %f MiB (0 if unsupported)",
236+
util::get_rss_bytes() / 1024.0 / 1024.0);
224237

225238
if (num_prompt_tokens + num_generated_tokens == seq_len) {
226239
ET_LOG(Info, "Sequence length (%i tokens) reached!", seq_len);

examples/models/llava/runner/llava_runner.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ Error LlavaRunner::load() {
6969
&stats_);
7070

7171
stats_.model_load_end_ms = util::time_in_ms();
72+
ET_LOG(
73+
Info,
74+
"RSS after loading model: %f MiB (0 if unsupported)",
75+
util::get_rss_bytes() / 1024.0 / 1024.0);
7276
return Error::Ok;
7377
}
7478

@@ -120,10 +124,20 @@ Error LlavaRunner::generate(
120124
ET_UNWRAP(text_prefiller_->prefill(user_prompt_tokens, pos));
121125
pos += num_user_tokens;
122126

127+
ET_LOG(
128+
Info,
129+
"RSS after prompt and image prefill: %f MiB (0 if unsupported)",
130+
util::get_rss_bytes() / 1024.0 / 1024.0);
131+
123132
// Generate tokens
124133
int64_t num_generated_tokens = ET_UNWRAP(text_token_generator_->generate(
125134
{prefill_next_token}, pos, seq_len, wrapped_callback));
126135

136+
ET_LOG(
137+
Info,
138+
"RSS after finishing text generation: %f MiB (0 if unsupported)",
139+
util::get_rss_bytes() / 1024.0 / 1024.0);
140+
127141
// Bookkeeping
128142
stats_.num_prompt_tokens = num_preset_tokens + num_user_tokens;
129143
stats_.num_generated_tokens = num_generated_tokens;

extension/llm/runner/util.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
#include <stdio.h>
1111
#include <time.h>
1212
#include <cctype>
13+
#if defined(__linux__) || defined(__ANDROID__) || defined(__unix__)
14+
#include <sys/resource.h>
15+
#endif
1316

1417
namespace executorch {
1518
namespace extension {
@@ -44,6 +47,27 @@ long inline time_in_ms() {
4447
return time.tv_sec * 1000 + time.tv_nsec / 1000000;
4548
}
4649

50+
// ----------------------------------------------------------------------------
51+
// utilities: memory usage
52+
53+
// Returns the current RSS in bytes. Returns 0 if not supported.
54+
// RSS: Resident Set Size, the amount of memory currently in the RAM for this
55+
// process. These values are approximate, and are only used for logging
56+
// purposes.
57+
size_t inline get_rss_bytes() {
58+
#if defined(__linux__) || defined(__ANDROID__) || defined(__unix__)
59+
struct rusage r_usage;
60+
if (getrusage(RUSAGE_SELF, &r_usage) == 0) {
61+
return r_usage.ru_maxrss * 1024;
62+
}
63+
#endif // __linux__ || __ANDROID__ || __unix__
64+
// Unsupported platform like Windows, or getrusage() failed.
65+
// __APPLE__ and __MACH__ are not supported because r_usage.ru_maxrss does not
66+
// consistently return kbytes on macOS. On older versions of macOS, it
67+
// returns bytes, but on newer versions it returns kbytes. Need to figure out
68+
// when this changed.
69+
return 0;
70+
}
4771
} // namespace llm
4872
} // namespace extension
4973
} // namespace executorch
@@ -53,6 +77,7 @@ namespace executor {
5377
namespace util {
5478
// TODO(T197294990): Remove these deprecated aliases once all users have moved
5579
// to the new `::executorch` namespaces.
80+
using ::executorch::extension::llm::get_rss_bytes;
5681
using ::executorch::extension::llm::safe_printf;
5782
using ::executorch::extension::llm::time_in_ms;
5883
} // namespace util

0 commit comments

Comments
 (0)