[Llama] Dump RSS info for Linux

digantdesai · digantdesai · commit 25517187edac · 2024-09-05T22:36:54.000-05:00
diff --git a/examples/models/llama2/runner/runner.cpp b/examples/models/llama2/runner/runner.cpp
@@ -153,6 +153,11 @@ Error Runner::generate(
     stats_.model_load_end_ms = util::time_in_ms();
   }
 
+  ET_LOG(
+      Info,
+      "RSS after loading model: %f MiB (0 if unsupported)",
+      util::get_rss_bytes() / 1024.0 / 1024.0);
+
   // Wrap the token_callback with print function
   std::function<void(const std::string&)> wrapped_callback =
       [token_callback](const std::string& piece) {
@@ -213,6 +218,10 @@ Error Runner::generate(
 
   // print the first token from prefill. No prev_token so use cur_token for it.
   wrapped_callback(ET_UNWRAP(tokenizer_->decode(cur_token, cur_token)));
+  ET_LOG(
+      Info,
+      "RSS after prompt prefill: %f MiB (0 if unsupported)",
+      util::get_rss_bytes() / 1024.0 / 1024.0);
 
   // start the main loop
   prompt_tokens.push_back(cur_token);
@@ -221,6 +230,10 @@ Error Runner::generate(
 
   stats_.inference_end_ms = util::time_in_ms();
   printf("\n");
+  ET_LOG(
+      Info,
+      "RSS after finishing text generation: %f MiB (0 if unsupported)",
+      util::get_rss_bytes() / 1024.0 / 1024.0);
 
   if (num_prompt_tokens + num_generated_tokens == seq_len) {
     ET_LOG(Info, "Sequence length (%i tokens) reached!", seq_len);
diff --git a/examples/models/llava/runner/llava_runner.cpp b/examples/models/llava/runner/llava_runner.cpp
@@ -69,6 +69,10 @@ Error LlavaRunner::load() {
       &stats_);
 
   stats_.model_load_end_ms = util::time_in_ms();
+  ET_LOG(
+      Info,
+      "RSS after loading model: %f MiB (0 if unsupported)",
+      util::get_rss_bytes() / 1024.0 / 1024.0);
   return Error::Ok;
 }
 
@@ -120,10 +124,20 @@ Error LlavaRunner::generate(
       ET_UNWRAP(text_prefiller_->prefill(user_prompt_tokens, pos));
   pos += num_user_tokens;
 
+  ET_LOG(
+      Info,
+      "RSS after prompt and image prefill: %f MiB (0 if unsupported)",
+      util::get_rss_bytes() / 1024.0 / 1024.0);
+
   // Generate tokens
   int64_t num_generated_tokens = ET_UNWRAP(text_token_generator_->generate(
       {prefill_next_token}, pos, seq_len, wrapped_callback));
 
+  ET_LOG(
+      Info,
+      "RSS after finishing text generation: %f MiB (0 if unsupported)",
+      util::get_rss_bytes() / 1024.0 / 1024.0);
+
   // Bookkeeping
   stats_.num_prompt_tokens = num_preset_tokens + num_user_tokens;
   stats_.num_generated_tokens = num_generated_tokens;
diff --git a/extension/llm/runner/util.h b/extension/llm/runner/util.h
@@ -10,6 +10,9 @@
 #include <stdio.h>
 #include <time.h>
 #include <cctype>
+#if defined(__linux__) || defined(__ANDROID__) || defined(__unix__)
+#include <sys/resource.h>
+#endif
 
 namespace executorch {
 namespace extension {
@@ -44,6 +47,27 @@ long inline time_in_ms() {
   return time.tv_sec * 1000 + time.tv_nsec / 1000000;
 }
 
+// ----------------------------------------------------------------------------
+// utilities: memory usage
+
+// Returns the current RSS in bytes. Returns 0 if not supported.
+// RSS: Resident Set Size, the amount of memory currently in the RAM for this
+// process. These values are approximate, and are only used for logging
+// purposes.
+size_t inline get_rss_bytes() {
+#if defined(__linux__) || defined(__ANDROID__) || defined(__unix__)
+  struct rusage r_usage;
+  if (getrusage(RUSAGE_SELF, &r_usage) == 0) {
+    return r_usage.ru_maxrss * 1024;
+  }
+#endif // __linux__ || __ANDROID__ || __unix__
+  // Unsupported platform like Windows, or getrusage() failed.
+  // __APPLE__ and __MACH__ are not supported because r_usage.ru_maxrss does not
+  // consistently return kbytes on macOS. On older versions of macOS, it
+  // returns bytes, but on newer versions it returns kbytes. Need to figure out
+  // when this changed.
+  return 0;
+}
 } // namespace llm
 } // namespace extension
 } // namespace executorch
@@ -53,6 +77,7 @@ namespace executor {
 namespace util {
 // TODO(T197294990): Remove these deprecated aliases once all users have moved
 // to the new `::executorch` namespaces.
+using ::executorch::extension::llm::get_rss_bytes;
 using ::executorch::extension::llm::safe_printf;
 using ::executorch::extension::llm::time_in_ms;
 } // namespace util