Skip to content

Commit d855333

Browse files
committed
[Llama] Dump RSS info for Linux
1 parent c83fd2e commit d855333

File tree

3 files changed

+57
-2
lines changed

3 files changed

+57
-2
lines changed

examples/models/llama2/runner/runner.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,11 @@ Error Runner::generate(
153153
stats_.model_load_end_ms = util::time_in_ms();
154154
}
155155

156+
ET_LOG(
157+
Info,
158+
"RSS after loading model: %f MiB (0 if unsupported)",
159+
util::get_rss_bytes() / 1024.0 / 1024.0);
160+
156161
// Wrap the token_callback with print function
157162
std::function<void(const std::string&)> wrapped_callback =
158163
[token_callback](const std::string& piece) {
@@ -213,6 +218,10 @@ Error Runner::generate(
213218

214219
// print the first token from prefill. No prev_token so use cur_token for it.
215220
wrapped_callback(ET_UNWRAP(tokenizer_->decode(cur_token, cur_token)));
221+
ET_LOG(
222+
Info,
223+
"RSS after prompt prefill: %f MiB (0 if unsupported)",
224+
util::get_rss_bytes() / 1024.0 / 1024.0);
216225

217226
// start the main loop
218227
prompt_tokens.push_back(cur_token);
@@ -221,6 +230,10 @@ Error Runner::generate(
221230

222231
stats_.inference_end_ms = util::time_in_ms();
223232
printf("\n");
233+
ET_LOG(
234+
Info,
235+
"RSS after finishing text generation: %f MiB (0 if unsupported)",
236+
util::get_rss_bytes() / 1024.0 / 1024.0);
224237

225238
if (num_prompt_tokens + num_generated_tokens == seq_len) {
226239
ET_LOG(Info, "Sequence length (%i tokens) reached!", seq_len);

examples/models/llava/runner/llava_runner.cpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,11 @@ Error LlavaRunner::generate(
131131
ET_CHECK_OK_OR_RETURN_ERROR(load());
132132
}
133133

134+
ET_LOG(
135+
Info,
136+
"RSS after loading model: %f MiB (0 if unsupported)",
137+
util::get_rss_bytes() / 1024.0 / 1024.0);
138+
134139
// Wrap the token_callback with print function
135140
std::function<void(const std::string&)> wrapped_callback =
136141
[token_callback](const std::string& piece) {
@@ -149,9 +154,21 @@ Error LlavaRunner::generate(
149154
// prefill images
150155
prefill_images(images, pos);
151156

157+
ET_LOG(
158+
Info,
159+
"RSS after prompt and image prefill: %f MiB (0 if unsupported)",
160+
util::get_rss_bytes() / 1024.0 / 1024.0);
161+
152162
// Generate tokens
153-
return generate_from_pos(
154-
prompt, seq_len, pos, wrapped_callback, stats_callback);
163+
Error err =
164+
generate_from_pos(prompt, seq_len, pos, wrapped_callback, stats_callback);
165+
166+
ET_LOG(
167+
Info,
168+
"RSS after finishing text generation: %f MiB (0 if unsupported)",
169+
util::get_rss_bytes() / 1024.0 / 1024.0);
170+
171+
return err;
155172
}
156173

157174
} // namespace torch::executor

extension/llm/runner/util.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
#include <stdio.h>
1111
#include <time.h>
1212
#include <cctype>
13+
#if defined(__linux__) || defined(__ANDROID__) || defined(__unix__)
14+
#include <sys/resource.h>
15+
#endif
1316

1417
namespace executorch {
1518
namespace extension {
@@ -44,6 +47,27 @@ long inline time_in_ms() {
4447
return time.tv_sec * 1000 + time.tv_nsec / 1000000;
4548
}
4649

50+
// ----------------------------------------------------------------------------
51+
// utilities: memory usage
52+
53+
// Returns the current RSS in bytes. Returns 0 if not supported.
54+
// RSS: Resident Set Size, the amount of memory currently in the RAM for this
55+
// process. These values are approximate, and are only used for logging
56+
// purposes.
57+
size_t inline get_rss_bytes() {
58+
#if defined(__linux__) || defined(__ANDROID__) || defined(__unix__)
59+
struct rusage r_usage;
60+
if (getrusage(RUSAGE_SELF, &r_usage) == 0) {
61+
return r_usage.ru_maxrss * 1024;
62+
}
63+
#endif // __linux__ || __ANDROID__ || __unix__
64+
// Unsupported platform like Windows, or getrusage() failed.
65+
// __APPLE__ and __MACH__ are not supported because r_usage.ru_maxrss does not
66+
// consistently return kbytes on macOS. On older versions of macOS, it
67+
// returns bytes, but on newer versions it returns kbytes. Need to figure out
68+
// when this changed.
69+
return 0;
70+
}
4771
} // namespace llm
4872
} // namespace extension
4973
} // namespace executorch
@@ -53,6 +77,7 @@ namespace executor {
5377
namespace util {
5478
// TODO(T197294990): Remove these deprecated aliases once all users have moved
5579
// to the new `::executorch` namespaces.
80+
using ::executorch::extension::llm::get_rss_bytes;
5681
using ::executorch::extension::llm::safe_printf;
5782
using ::executorch::extension::llm::time_in_ms;
5883
} // namespace util

0 commit comments

Comments
 (0)