Skip to content

[Llama] Dump RSS info for Linux #5101

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions examples/models/llama2/runner/runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@ Error Runner::generate(
stats_.model_load_end_ms = util::time_in_ms();
}

ET_LOG(
Info,
"RSS after loading model: %f MiB (0 if unsupported)",
util::get_rss_bytes() / 1024.0 / 1024.0);

// Wrap the token_callback with print function
std::function<void(const std::string&)> wrapped_callback =
[token_callback](const std::string& piece) {
Expand Down Expand Up @@ -213,6 +218,10 @@ Error Runner::generate(

// print the first token from prefill. No prev_token so use cur_token for it.
wrapped_callback(ET_UNWRAP(tokenizer_->decode(cur_token, cur_token)));
ET_LOG(
Info,
"RSS after prompt prefill: %f MiB (0 if unsupported)",
util::get_rss_bytes() / 1024.0 / 1024.0);

// start the main loop
prompt_tokens.push_back(cur_token);
Expand All @@ -221,6 +230,10 @@ Error Runner::generate(

stats_.inference_end_ms = util::time_in_ms();
printf("\n");
ET_LOG(
Info,
"RSS after finishing text generation: %f MiB (0 if unsupported)",
util::get_rss_bytes() / 1024.0 / 1024.0);

if (num_prompt_tokens + num_generated_tokens == seq_len) {
ET_LOG(Info, "Sequence length (%i tokens) reached!", seq_len);
Expand Down
21 changes: 19 additions & 2 deletions examples/models/llava/runner/llava_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,11 @@ Error LlavaRunner::generate(
ET_CHECK_OK_OR_RETURN_ERROR(load());
}

ET_LOG(
Info,
"RSS after loading model: %f MiB (0 if unsupported)",
util::get_rss_bytes() / 1024.0 / 1024.0);

// Wrap the token_callback with print function
std::function<void(const std::string&)> wrapped_callback =
[token_callback](const std::string& piece) {
Expand All @@ -149,9 +154,21 @@ Error LlavaRunner::generate(
// prefill images
prefill_images(images, pos);

ET_LOG(
Info,
"RSS after prompt and image prefill: %f MiB (0 if unsupported)",
util::get_rss_bytes() / 1024.0 / 1024.0);

// Generate tokens
return generate_from_pos(
prompt, seq_len, pos, wrapped_callback, stats_callback);
Error err =
generate_from_pos(prompt, seq_len, pos, wrapped_callback, stats_callback);

ET_LOG(
Info,
"RSS after finishing text generation: %f MiB (0 if unsupported)",
util::get_rss_bytes() / 1024.0 / 1024.0);

return err;
}

} // namespace torch::executor
25 changes: 25 additions & 0 deletions extension/llm/runner/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
#include <stdio.h>
#include <time.h>
#include <cctype>
#if defined(__linux__) || defined(__ANDROID__) || defined(__unix__)
#include <sys/resource.h>
#endif

namespace executorch {
namespace extension {
Expand Down Expand Up @@ -44,6 +47,27 @@ long inline time_in_ms() {
return time.tv_sec * 1000 + time.tv_nsec / 1000000;
}

// ----------------------------------------------------------------------------
// utilities: memory usage

// Returns the current RSS in bytes. Returns 0 if not supported.
// RSS: Resident Set Size, the amount of memory currently in the RAM for this
// process. These values are approximate, and are only used for logging
// purposes.
size_t inline get_rss_bytes() {
#if defined(__linux__) || defined(__ANDROID__) || defined(__unix__)
struct rusage r_usage;
if (getrusage(RUSAGE_SELF, &r_usage) == 0) {
return r_usage.ru_maxrss * 1024;
}
#endif // __linux__ || __ANDROID__ || __unix__
// Unsupported platform like Windows, or getrusage() failed.
// __APPLE__ and __MACH__ are not supported because r_usage.ru_maxrss does not
// consistently return kbytes on macOS. On older versions of macOS, it
// returns bytes, but on newer versions it returns kbytes. Need to figure out
// when this changed.
return 0;
}
} // namespace llm
} // namespace extension
} // namespace executorch
Expand All @@ -53,6 +77,7 @@ namespace executor {
namespace util {
// TODO(T197294990): Remove these deprecated aliases once all users have moved
// to the new `::executorch` namespaces.
using ::executorch::extension::llm::get_rss_bytes;
using ::executorch::extension::llm::safe_printf;
using ::executorch::extension::llm::time_in_ms;
} // namespace util
Expand Down
Loading