[LLDB][Telemetry]Define DebuggerTelemetryInfo and related methods #127696

oontvoo · 2025-02-18T21:01:26Z

This type of entry is used to collect data about the debugger startup/exit
Tests will be added (They may need to be shell test with a "test-only" TelemetryManager plugin defined. I'm trying to figure out how to get that linked only when tests are running and not to the LLDB binary all the time.

- This type of entry is used to collect data about the debugger startup/exit - Tests will be added (They may need to be shell test with a "test-only" TelemetryManager plugin defined. I'm trying to figure out how to get that linked only when tests are running and not to the LLDB binary all the time.

llvmbot · 2025-02-18T21:02:03Z

@llvm/pr-subscribers-lldb

Author: Vy Nguyen (oontvoo)

Changes

This type of entry is used to collect data about the debugger startup/exit
Tests will be added (They may need to be shell test with a "test-only" TelemetryManager plugin defined. I'm trying to figure out how to get that linked only when tests are running and not to the LLDB binary all the time.

Full diff: https://github.com/llvm/llvm-project/pull/127696.diff

3 Files Affected:

(modified) lldb/include/lldb/Core/Telemetry.h (+78)
(modified) lldb/source/Core/Debugger.cpp (+40)
(modified) lldb/source/Core/Telemetry.cpp (+102-13)

diff --git a/lldb/include/lldb/Core/Telemetry.h b/lldb/include/lldb/Core/Telemetry.h
index b72556ecaf3c9..d6eec5dc687be 100644
--- a/lldb/include/lldb/Core/Telemetry.h
+++ b/lldb/include/lldb/Core/Telemetry.h
@@ -13,6 +13,7 @@
 #include "lldb/Interpreter/CommandReturnObject.h"
 #include "lldb/Utility/StructuredData.h"
 #include "lldb/lldb-forward.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/JSON.h"
@@ -29,6 +30,9 @@ namespace telemetry {
 
 struct LLDBEntryKind : public ::llvm::telemetry::EntryKind {
   static const llvm::telemetry::KindType BaseInfo = 0b11000;
+  static const llvm::telemetry::KindType DebuggerInfo = 0b11001;
+  // There are other entries in between (added in separate PRs)
+  static const llvm::telemetry::KindType MiscInfo = 0b11110;
 };
 
 /// Defines a convenient type for timestamp of various events.
@@ -56,6 +60,71 @@ struct LLDBBaseTelemetryInfo : public llvm::telemetry::TelemetryInfo {
   void serialize(llvm::telemetry::Serializer &serializer) const override;
 };
 
+/// Describes the exit status of a debugger.
+struct ExitDescription {
+  int exit_code;
+  std::string description;
+};
+
+struct DebuggerTelemetryInfo : public LLDBBaseTelemetryInfo {
+  std::string username;
+  std::string lldb_git_sha;
+  std::string lldb_path;
+  std::string cwd;
+  std::optional<ExitDescription> exit_desc;
+
+  DebuggerTelemetryInfo() = default;
+
+  // Provide a copy ctor because we may need to make a copy before
+  // sanitizing the data.
+  // (The sanitization might differ between different Destination classes).
+  DebuggerTelemetryInfo(const DebuggerTelemetryInfo &other) {
+    username = other.username;
+    lldb_git_sha = other.lldb_git_sha;
+    lldb_path = other.lldb_path;
+    cwd = other.cwd;
+  };
+
+  llvm::telemetry::KindType getKind() const override {
+    return LLDBEntryKind::DebuggerInfo;
+  }
+
+  static bool classof(const llvm::telemetry::TelemetryInfo *T) {
+    return T->getKind() == LLDBEntryKind::DebuggerInfo;
+  }
+
+  void serialize(llvm::telemetry::Serializer &serializer) const override;
+};
+
+/// The "catch-all" entry to store a set of non-standard data, such as
+/// error-messages, etc.
+struct MiscTelemetryInfo : public LLDBBaseTelemetryInfo {
+  /// If the event is/can be associated with a target entry,
+  /// this field contains that target's UUID.
+  /// <EMPTY> otherwise.
+  std::string target_uuid;
+
+  /// Set of key-value pairs for any optional (or impl-specific) data
+  std::map<std::string, std::string> meta_data;
+
+  MiscTelemetryInfo() = default;
+
+  MiscTelemetryInfo(const MiscTelemetryInfo &other) {
+    target_uuid = other.target_uuid;
+    meta_data = other.meta_data;
+  }
+
+  llvm::telemetry::KindType getKind() const override {
+    return LLDBEntryKind::MiscInfo;
+  }
+
+  static bool classof(const llvm::telemetry::TelemetryInfo *T) {
+    return T->getKind() == LLDBEntryKind::MiscInfo;
+  }
+
+  void serialize(llvm::telemetry::Serializer &serializer) const override;
+};
+
 /// The base Telemetry manager instance in LLDB.
 /// This class declares additional instrumentation points
 /// applicable to LLDB.
@@ -63,6 +132,11 @@ class TelemetryManager : public llvm::telemetry::Manager {
 public:
   llvm::Error preDispatch(llvm::telemetry::TelemetryInfo *entry) override;
 
+  const llvm::telemetry::Config *getConfig();
+
+  void atDebuggerStartup(DebuggerTelemetryInfo *entry);
+  void atDebuggerExit(DebuggerTelemetryInfo *entry);
+
   virtual llvm::StringRef GetInstanceName() const = 0;
   static TelemetryManager *getInstance();
 
@@ -73,6 +147,10 @@ class TelemetryManager : public llvm::telemetry::Manager {
 
 private:
   std::unique_ptr<llvm::telemetry::Config> m_config;
+  // Each debugger is assigned a unique ID (session_id).
+  // All TelemetryInfo entries emitted for the same debugger instance
+  // will get the same session_id.
+  llvm::DenseMap<Debugger *, std::string> session_ids;
   static std::unique_ptr<TelemetryManager> g_instance;
 };
 
diff --git a/lldb/source/Core/Debugger.cpp b/lldb/source/Core/Debugger.cpp
index 18569e155b517..b458abc798a9e 100644
--- a/lldb/source/Core/Debugger.cpp
+++ b/lldb/source/Core/Debugger.cpp
@@ -62,6 +62,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator.h"
+#include "llvm/Config/llvm-config.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Process.h"
@@ -69,6 +70,11 @@
 #include "llvm/Support/Threading.h"
 #include "llvm/Support/raw_ostream.h"
 
+#ifdef LLVM_BUILD_TELEMETRY
+#include "lldb/Core/Telemetry.h"
+#include <chrono>
+#endif
+
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
@@ -761,12 +767,29 @@ void Debugger::InstanceInitialize() {
 
 DebuggerSP Debugger::CreateInstance(lldb::LogOutputCallback log_callback,
                                     void *baton) {
+#ifdef LLVM_BUILD_TELEMETRY
+  lldb_private::telemetry::SteadyTimePoint start_time =
+      std::chrono::steady_clock::now();
+#endif
   DebuggerSP debugger_sp(new Debugger(log_callback, baton));
   if (g_debugger_list_ptr && g_debugger_list_mutex_ptr) {
     std::lock_guard<std::recursive_mutex> guard(*g_debugger_list_mutex_ptr);
     g_debugger_list_ptr->push_back(debugger_sp);
   }
   debugger_sp->InstanceInitialize();
+
+#ifdef LLVM_BUILD_TELEMETRY
+  if (auto *telemetry_manager = telemetry::TelemetryManager::getInstance()) {
+    if (telemetry_manager->getConfig()->EnableTelemetry) {
+      lldb_private::telemetry::DebuggerTelemetryInfo entry;
+      entry.start_time = start_time;
+      entry.end_time = std::chrono::steady_clock::now();
+      entry.debugger = debugger_sp.get();
+      telemetry_manager->atDebuggerStartup(&entry);
+    }
+  }
+#endif
+
   return debugger_sp;
 }
 
@@ -985,6 +1008,10 @@ void Debugger::Clear() {
   //     static void Debugger::Destroy(lldb::DebuggerSP &debugger_sp);
   //     static void Debugger::Terminate();
   llvm::call_once(m_clear_once, [this]() {
+#ifdef LLVM_BUILD_TELEMETRY
+    lldb_private::telemetry::SteadyTimePoint quit_start_time =
+        std::chrono::steady_clock::now();
+#endif
     ClearIOHandlers();
     StopIOHandlerThread();
     StopEventHandlerThread();
@@ -1007,6 +1034,19 @@ void Debugger::Clear() {
 
     if (Diagnostics::Enabled())
       Diagnostics::Instance().RemoveCallback(m_diagnostics_callback_id);
+
+#ifdef LLVM_BUILD_TELEMETRY
+    if (auto telemetry_manager = telemetry::TelemetryManager::getInstance()) {
+      if (telemetry_manager->getConfig()->EnableTelemetry) {
+        // TBD: We *may* have to send off the log BEFORE the ClearIOHanders()?
+        lldb_private::telemetry::DebuggerTelemetryInfo entry;
+        entry.start_time = quit_start_time;
+        entry.end_time = std::chrono::steady_clock::now();
+        entry.debugger = this;
+        telemetry_manager->atDebuggerExit(&entry);
+      }
+    }
+#endif
   });
 }
 
diff --git a/lldb/source/Core/Telemetry.cpp b/lldb/source/Core/Telemetry.cpp
index 5222f76704f91..ab7d8ae0b44df 100644
--- a/lldb/source/Core/Telemetry.cpp
+++ b/lldb/source/Core/Telemetry.cpp
@@ -10,14 +10,20 @@
 
 #ifdef LLVM_BUILD_TELEMETRY
 
-#include "lldb/Core/Telemetry.h"
 #include "lldb/Core/Debugger.h"
+#include "lldb/Core/Telemetry.h"
+#include "lldb/Host/FileSystem.h"
+#include "lldb/Host/HostInfo.h"
+#include "lldb/Target/Process.h"
+#include "lldb/Target/Statistics.h"
 #include "lldb/Utility/LLDBLog.h"
 #include "lldb/Utility/UUID.h"
+#include "lldb/Version/Version.h"
 #include "lldb/lldb-enumerations.h"
 #include "lldb/lldb-forward.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/RandomNumberGenerator.h"
 #include "llvm/Telemetry/Telemetry.h"
 #include <chrono>
@@ -35,15 +41,7 @@ static uint64_t ToNanosec(const SteadyTimePoint Point) {
   return std::chrono::nanoseconds(Point.time_since_epoch()).count();
 }
 
-void LLDBBaseTelemetryInfo::serialize(Serializer &serializer) const {
-  serializer.write("entry_kind", getKind());
-  serializer.write("session_id", SessionId);
-  serializer.write("start_time", ToNanosec(start_time));
-  if (end_time.has_value())
-    serializer.write("end_time", ToNanosec(end_time.value()));
-}
-
-[[maybe_unused]] static std::string MakeUUID(Debugger *debugger) {
+static std::string MakeUUID(Debugger *debugger) {
   uint8_t random_bytes[16];
   if (auto ec = llvm::getRandomBytes(random_bytes, 16)) {
     LLDB_LOG(GetLog(LLDBLog::Object),
@@ -56,16 +54,107 @@ void LLDBBaseTelemetryInfo::serialize(Serializer &serializer) const {
   return UUID(random_bytes).GetAsString();
 }
 
+void LLDBBaseTelemetryInfo::serialize(Serializer &serializer) const {
+  serializer.write("entry_kind", getKind());
+  serializer.write("session_id", SessionId);
+  serializer.write("start_time", ToNanosec(start_time));
+  if (end_time.has_value())
+    serializer.write("end_time", ToNanosec(end_time.value()));
+}
+
+void DebuggerTelemetryInfo::serialize(Serializer &serializer) const {
+  LLDBBaseTelemetryInfo::serialize(serializer);
+
+  serializer.write("username", username);
+  serializer.write("lldb_git_sha", lldb_git_sha);
+  serializer.write("lldb_path", lldb_path);
+  serializer.write("cwd", cwd);
+  if (exit_desc.has_value()) {
+    serializer.write("exit_code", exit_desc->exit_code);
+    serializer.write("exit_desc", exit_desc->description);
+  }
+}
+
+void MiscTelemetryInfo::serialize(Serializer &serializer) const {
+  LLDBBaseTelemetryInfo::serialize(serializer);
+  serializer.write("target_uuid", target_uuid);
+  serializer.beginObject("meta_data");
+  for (const auto &kv : meta_data)
+    serializer.write(kv.first, kv.second);
+  serializer.endObject();
+}
+
 TelemetryManager::TelemetryManager(std::unique_ptr<Config> config)
     : m_config(std::move(config)) {}
 
 llvm::Error TelemetryManager::preDispatch(TelemetryInfo *entry) {
-  // Do nothing for now.
-  // In up-coming patch, this would be where the manager
-  // attach the session_uuid to the entry.
+  LLDBBaseTelemetryInfo *lldb_entry =
+      llvm::dyn_cast<LLDBBaseTelemetryInfo>(entry);
+  std::string session_id = "";
+  if (Debugger *debugger = lldb_entry->debugger) {
+    auto session_id_pos = session_ids.find(debugger);
+    if (session_id_pos != session_ids.end())
+      session_id = session_id_pos->second;
+    else
+      session_id_pos->second = session_id = MakeUUID(debugger);
+  }
+  lldb_entry->SessionId = session_id;
+
   return llvm::Error::success();
 }
 
+const Config *getConfig() { return m_config.get(); }
+
+void TelemetryManager::atDebuggerStartup(DebuggerTelemetryInfo *entry) {
+  UserIDResolver &resolver = lldb_private::HostInfo::GetUserIDResolver();
+  std::optional<llvm::StringRef> opt_username =
+      resolver.GetUserName(lldb_private::HostInfo::GetUserID());
+  if (opt_username)
+    entry->username = *opt_username;
+
+  entry->lldb_git_sha =
+      lldb_private::GetVersion(); // TODO: find the real git sha?
+
+  entry->lldb_path = HostInfo::GetProgramFileSpec().GetPath();
+
+  llvm::SmallString<64> cwd;
+  if (!llvm::sys::fs::current_path(cwd)) {
+    entry->cwd = cwd.c_str();
+  } else {
+    MiscTelemetryInfo misc_info;
+    misc_info.meta_data["internal_errors"] = "Cannot determine CWD";
+    if (auto er = dispatch(&misc_info)) {
+      LLDB_LOG(GetLog(LLDBLog::Object),
+               "Failed to dispatch misc-info at startup");
+    }
+  }
+
+  if (auto er = dispatch(entry)) {
+    LLDB_LOG(GetLog(LLDBLog::Object), "Failed to dispatch entry at startup");
+  }
+}
+
+void TelemetryManager::atDebuggerExit(DebuggerTelemetryInfo *entry) {
+  // There must be a reference to the debugger at this point.
+  assert(entry->debugger != nullptr);
+
+  if (auto *selected_target =
+          entry->debugger->GetSelectedExecutionContext().GetTargetPtr()) {
+    if (!selected_target->IsDummyTarget()) {
+      const lldb::ProcessSP proc = selected_target->GetProcessSP();
+      if (proc == nullptr) {
+        // no process has been launched yet.
+        entry->exit_desc = {-1, "no process launched."};
+      } else {
+        entry->exit_desc = {proc->GetExitStatus(), ""};
+        if (const char *description = proc->GetExitDescription())
+          entry->exit_desc->description = std::string(description);
+      }
+    }
+  }
+  dispatch(entry);
+}
+
 std::unique_ptr<TelemetryManager> TelemetryManager::g_instance = nullptr;
 TelemetryManager *TelemetryManager::getInstance() { return g_instance.get(); }

oontvoo · 2025-02-18T21:04:20Z

lldb/include/lldb/Core/Telemetry.h

+  // Each debugger is assigned a unique ID (session_id).
+  // All TelemetryInfo entries emitted for the same debugger instance
+  // will get the same session_id.
+  llvm::DenseMap<Debugger *, std::string> session_ids;


This is a lazily-populated map. We are not associating the TelemetryManager with any Debugger. Rather, each TelemetryInfo entry may carry a pointer to a debugger. We compute some unique-ID for each debugger and put it in this map to re-use rather than having to re-compute everytime

Should this store the debugger id instead of the debugger pointer? Debugger will get destroyed and this seems like a foot-gun.

When the debugger is destroyed, we'd remove it from the map. When does a Debugger object get destroyed? Is it when the process dies?

(Unless you're saying there could be multiple different Debugger objects for the same entity? )

I think it could work, but I do believe its error-prone -- and maybe also unnecessary. I think we could avoid all this is we make the unique identifier of a debugger a pair. The first component would be the identifier of the TelemetryManager instance, generated when it's constructed (using random data, current time stamp, or whatever). The second component could the debugger ID (as retrieved by debugger->GetID(), which is a sequentially increasing integer). Then we don't need to store any per-debugger data,

The second component guarantees uniqueness of the identifier within a single process, the second does it "globally". If a particular backend doesn't like the fact that this is a pair, it can just concatenate them.

JDevlieghere · 2025-02-18T23:22:13Z

lldb/include/lldb/Core/Telemetry.h

+
+struct DebuggerTelemetryInfo : public LLDBBaseTelemetryInfo {
+  std::string username;
+  std::string lldb_git_sha;


You may not have a git hash. How about storing the lldb version instead?

Right - that's the general idea. But for our specific use-case, (as far as I remember) we need the git-sha to figure out which release it was (the info will be supplied at build time). I don't remember the exact detail, though.

will make this a generic "lldb_version" for now.

JDevlieghere · 2025-02-18T23:30:18Z

lldb/include/lldb/Core/Telemetry.h

+  std::string description;
+};
+
+struct DebuggerTelemetryInfo : public LLDBBaseTelemetryInfo {


Looking at this event, I notice that it contains two kinds of information:

Information coming from lldb: the version and the exit status/description.

Information coming from the environment lldb is run in: the username, the working directory and LLDB's path.

I think we should split this into two entries. For privacy reasons, I will never be interested in collecting the latter, but I'm very much interested in the former. If they're different entries, I can easily ignore on in my backend, but now I'd need to filter out individual fields.

I think we should split this into two entries.

There main reasons these were packed into one was I/O : Fewer entries to send is generally better

For privacy reasons, I will never be interested in collecting the latter, but I'm very much interested in the former. If they're different entries, I can easily ignore on in my backend, but now I'd need to filter out individual fields.

Can you clarify why ignoring a particular type of entry is any more privacy compliant than ignoring some particular fields within an entry?

The principle here is that upstream LLDB collects as much relevant data as it can - then in each vendor-specific code, one can choose which fields to keep and which to ignore (You'd put such logic in your Destination class(es)).

I have many thoughts here. In no particular order, they are:

I don't find the I/O argument particularly convincing, as I expect that any reasonable backend will want to cache/batch sending of these entries anyway

I can believe that for analysis purposes, it's nicer to have a single "this is everything I know about the environment of the session" entry. I can also understand the desire for easy filtering of unwanted data. I don't know if there's a reasonable compromise here. Group this data into two sub-structs?

the collect-then-filter approach makes sense for things which cannot be collected in any other way. However, pretty much everything being collected here is static data. There's no reason why it has to be collected at a particular location. (Like, even if we just stopped collecting the username here, a backend could just refetch it if it wanted to). Maybe we could not include things that recomputed in the backend? Or at least things that are likely to be problematic?

I still don't understand how it would be much simpler filter out some structs. Then we would still have to make the decision which fields go into the the so-called "sensitive data struct".

I imagine the vendor-specific code would look something like this

static DebuggerInfo* sanitizeFields(DebuggerInfo* entry) { // null out any fields you dont want to store // .... } Error VendorDestination::receiveEntry(DebuggerInfo* entry) { auto* newEntry = sanitizeFields(entry); storeToInternalStorage(entry); }

And yes, it's more convenient to group all the context together (otherwise in downstream data structure, we'd need to do a bunch of joining to build a complete "picture").

My arguments for separate events is that it's much easier to spot what's being collected: my backend is explicitly collecting events A, B and C and ignoring everything else. If I need to filter out fields that's a lot harder to audit. If you want to emit that as a single event in your backend, that's totally fine, but that's the responsibility of the emitter. I think it's much more important that the events are meaningful semantically.

Ah, actually, we could make all the atDebuggerStartup,atDebuggerExit, etc methods virtual.

The upstream can collect a minimal set of common data.
Then the downstream's impl can override it to collect more (or less) data as needed.
Would that be an acceptable approach?

P.S: updated the method to not collect any of the username, cwd, lldb_path (just leaving the fields empty).
(vendors can override the AtDebuggerStart() to collect those if they want to)

Okay, but the fields are still there. I know why you did this, but I think it's strange to have a field that is never set anywhere, and I definitely wouldn't want to define a field for every piece of information that anybody wants to collect. If we go in this direction (data collected downstream), then I think the fields should be defined downstream as well. One way to do that would be to wrap the upstream member in a vendor-specific event struct which includes the upstream struct as a member (or just a copy of its members). Another would be to attach them directly in the Destination class (that way the custom event struct doesn't even have to exist).

lldb/include/lldb/Core/Telemetry.h

JDevlieghere · 2025-02-18T23:32:46Z

lldb/include/lldb/Core/Telemetry.h

+  // Provide a copy ctor because we may need to make a copy before
+  // sanitizing the data.
+  // (The sanitization might differ between different Destination classes).


What's this sanitization. Does that exist yet? I thought the idea was that different emitters could decide what events to listen for? Did I miss that design change? It would be nice if we don't have to implement these trivial copy constructors.

What's this sanitization.

This goes back to your previous privacy question. It's where the Destination::receiveEntry() decides whether to delete sensitive fields before sending the entry to some custom storage. If it does want to delete the fields, it'd do so by copying the original entry, then modifying the copy. Otherwise, it'd just send the original w/o copying,

Does that exist yet?

No, it's up to the vendors to decide which fields to sanitize (because I bet we'd all disagree on this).

That makes sense to me, but it doesn't explain why we need to spell out the copying by hand. It's very easy to forget to update it (and it looks like even this implementation does not copy entries from the base class).

I thought this had something to do with this being being a polymorphic class (which usually have their copy constructors deleted -- for a reason), but AFAICT, all of the bases actually have their copy constructors (implicitly) defined. The possibility of object slicing is another complaint I have about this design (and I'm surprised it doesn't trigger any static analysis warnings).

In order to prevent object slicing, and avoid spelling out copy operations, how about we do something like this. Basically, divide the two classes in the hierarchy into two kinds:

abstract classes: these cannot be instantiated, so there's no danger of slicing. They still have a copy constructor, but they cannot be copied on their own.

concrete final classes: these can be copied (using the default copy constructor implementation). However, they cannot be inherited from, which means they cannot slice anything.

I've removed the copy ctor - agreed that it could be an impl detail. no need to spell it out here.

lldb/include/lldb/Core/Telemetry.h

JDevlieghere · 2025-02-18T23:37:05Z

lldb/include/lldb/Core/Telemetry.h

+  // Each debugger is assigned a unique ID (session_id).
+  // All TelemetryInfo entries emitted for the same debugger instance
+  // will get the same session_id.
+  llvm::DenseMap<Debugger *, std::string> session_ids;


Should this store the debugger id instead of the debugger pointer? Debugger will get destroyed and this seems like a foot-gun.

JDevlieghere · 2025-02-18T23:40:40Z

lldb/source/Core/Debugger.cpp

+#ifdef LLVM_BUILD_TELEMETRY
+  lldb_private::telemetry::SteadyTimePoint start_time =
+      std::chrono::steady_clock::now();
+#endif


I think we should wrap this into an RAII object (like Progress) so that this does the right thing automatically.

(NOT done - this is pending the result of whether we'd revert the ifdef approach and instead just make the cmake variable control llvm::telemetry::Config::EnableTelemetry)

JDevlieghere · 2025-02-18T23:42:22Z

lldb/source/Core/Debugger.cpp

+#ifdef LLVM_BUILD_TELEMETRY
+  if (auto *telemetry_manager = telemetry::TelemetryManager::getInstance()) {
+    if (telemetry_manager->getConfig()->EnableTelemetry) {
+      lldb_private::telemetry::DebuggerTelemetryInfo entry;
+      entry.start_time = start_time;
+      entry.end_time = std::chrono::steady_clock::now();
+      entry.debugger = debugger_sp.get();
+      telemetry_manager->atDebuggerStartup(&entry);
+    }
+  }
+#endif


I have to admit I didn't consider this implication when I added the CMake flag to make this configurable. I think we need to rethink this because checking the ifdef everywhere is going to get old really quickly.

Can we convince the person that asked for this to accept the solution of setting telemetry::Config::EnableTelemetry=false and trusting that it'd do the right thign?

Then we wouldn't need this cmake configurability.

I'd very much like that, but I don't know how feasible is that. However, if that doesn't work out, I think we should still figure out a way to reduce the number of ifdefs. The RAII object might be way to do that (by defining a noop raii object if telemetry is disabled).

but I don't know how feasible is that.

I would really like to understand the objection. I recall the reasoning was something like it's faster to check for the non-existence of "Telemetry" symbol. If that was the case, we can offer to define some statically defined symbol like DISABLE_TELEMETRY that would set the telemetry::Config::EnableTelemetry=false at build time and cannot be overriden.

I believe that would simplify a lot of things

When I introduced the CMake variable, I had something in mind like what we do for signposts on macOS (LLVM_SUPPORT_XCODE_SIGNPOSTS) where we have a wrapper and then compile out support when it's not available. Admittedly, that's not what I did by conditionalizing the library, so that's on me and I'm happy to rectify that.

That said, as I'm seeing how this is actually used in practice, I'm not so sure of the benefit of compiling things out, so maybe keeping the CMake variable and making it so that it's impossible to turn on at runtime if the flag is set, is good enough.

I would actually argue that the telemetry library (before the cmake flag) was very similar to the signposts setup (and all of the other cases where we have an external dependency), with the library itself being the wrapper: it exists, and can be used unconditionally, but unless some other component is also available, then it amounts to a slightly sophisticated noop.

Sure, in this case the subject matter is a lot more sensitive, and there are some differences in how it works. For example, that this "other component" takes the form of a subclass, and that no support for an actual implementation exists in this repo, but is rather meant to be added downstream. However, I think the principle is the same.

The last point (no actual concrete implementation) also makes me feel like any cmake flag would be mostly a placebo (as the only way to make this do something is to do it downstream, and at that point, you can always disable/override any protections we add here), but if that's what it takes, then that's fine. I think anything would be better than #ifdefing everything out, or building another wrapper on top of that. I just don't know what kind of a form should that take. For example would something like

TelemetryManager* getInstance() { #ifdef TELEMETRY return g_instance; #else return nullptr; #endif }

be sufficient? (if we put that in a header, we should also have a decent chance of the compiler dead-stripping all the code which depends on this)

be sufficient? (if we put that in a header, we should also have a decent chance of the compiler dead-stripping all the code which depends on this)

It could be a bit more complicated because the callers/users have to reference the structs/classes defined in the telemetry library. (The library itself is not actually 100% abstract classes. All of the TelemetryInfo* are concretely defined upstream.)

So I would argue for reverting the current if-def approach.
Specifically,

Rename BUILD_LLVM_TELEMETRY => ENABLE_TELEMETRY (which is set to TRUE by default)

In the base Config class:

struct Config { public: const bool EnableTelemetry; // Telemetry can only be enabled at runtime if both the build-flag and the runtime condition is TRUE Config(bool Enable) : EnableTelemetry(Enable && ENABLE_TELEMETRY) {} } ``` For "extra-measure", the `::getInstance` would check the ENABLE_TELEMETRY flag too, of course. WDYT? ```

JDevlieghere · 2025-02-18T23:43:19Z

lldb/source/Core/Debugger.cpp

+#ifdef LLVM_BUILD_TELEMETRY
+    if (auto telemetry_manager = telemetry::TelemetryManager::getInstance()) {
+      if (telemetry_manager->getConfig()->EnableTelemetry) {
+        // TBD: We *may* have to send off the log BEFORE the ClearIOHanders()?
+        lldb_private::telemetry::DebuggerTelemetryInfo entry;
+        entry.start_time = quit_start_time;
+        entry.end_time = std::chrono::steady_clock::now();
+        entry.debugger = this;
+        telemetry_manager->atDebuggerExit(&entry);
+      }
+    }
+#endif


This code is almost identical to the start time, this definitely an go into an RAII object that's shared between the two and calls the appropriate atDebugger method.

lldb/include/lldb/Core/Telemetry.h

Co-authored-by: Jonas Devlieghere <[email protected]>

lldb/source/Core/Telemetry.cpp

labath · 2025-02-19T11:05:26Z

lldb/source/Core/Telemetry.cpp

+  if (auto *selected_target =
+          entry->debugger->GetSelectedExecutionContext().GetTargetPtr()) {
+    if (!selected_target->IsDummyTarget()) {
+      const lldb::ProcessSP proc = selected_target->GetProcessSP();
+      if (proc == nullptr) {
+        // no process has been launched yet.
+        entry->exit_desc = {-1, "no process launched."};
+      } else {
+        entry->exit_desc = {proc->GetExitStatus(), ""};
+        if (const char *description = proc->GetExitDescription())
+          entry->exit_desc->description = std::string(description);
+      }
+    }
+  }


This is a fairly strange place to collect this kind of information. There's no guarantee that Process object will exist at the time the debugger terminates, or that it will have ran exactly once, etc.

If you're interesting in collecting the exit status, why not hook into a place that deals with that directly (somewhere around Process::SetExitStatus)?

There's no guarantee that Process object will exist at the time the debugger terminates, or that it will have ran exactly once, etc.

Yes, that's why there' s a check - if there was no process when the debugger terminates (ie proc == nullptr), then we reports taht no process has been launched.

If you're interesting in collecting the exit status, why not hook into a place that deals with that directly

This code is more interested in the exit of the debugger (ie., did the debugger exit normally or because some process crashed? or waht ...), which is why the telemetry-callback is done from Debugger's cleanup code.

There's no guarantee that Process object will exist at the time the debugger terminates, or that it will have ran exactly once, etc.

Yes, that's why there' s a check - if there was no process when the debugger terminates (ie proc == nullptr), then we reports taht no process has been launched.

And if the process was launched twice this will get the last exit code -- is that what you want? What about the situation where the process is launched and then its target is deleted (run; target delete; exit)? Or when the process is launched, but then the user switches to debugging a different target (run; target create/select; exit)?

If you're interesting in collecting the exit status, why not hook into a place that deals with that directly

This code is more interested in the exit of the debugger (ie., did the debugger exit normally or because some process crashed? or waht ...), which is why the telemetry-callback is done from Debugger's cleanup code.

I don't see how one is relevant for the other. LLDB doesn't exit because the debugged process crashed. Any debuggee exit code is WAI as far as LLDB is concerned. I can see how you might be able to gather some very rough trends based on this information, but I think there may be better ways to achieve that (for example, I think that a better proxy for "was the user able to do something useful in this session" would be "whether the process exited without hitting any breakpoints". I also don't see why this would have to be gathered here, and I suspect its because you see lldb through the eyes of the DAP protocol, where there's a mostly one-to-one correspondence between a dap connection, a debugger object, and a debugged process. LLDB is much more general than that. It's true that a lot of command line users will use LLDB in a dap-like manner, but we actually want to discourage that as longer-lived sessions give you the benefit of all having all the information cached already.

labath · 2025-02-19T11:09:08Z

lldb/source/Core/Telemetry.cpp

+    MiscTelemetryInfo misc_info;
+    misc_info.meta_data["internal_errors"] = "Cannot determine CWD";
+    if (auto er = dispatch(&misc_info)) {


Is this actually useful? All it really does is tell you that determining CWD failed, but you could also find that out by looking at whether the cwd field is empty.

labath · 2025-02-19T11:15:18Z

lldb/source/Core/Telemetry.cpp

+const Config *getConfig() { return m_config.get(); }
+
+void TelemetryManager::atDebuggerStartup(DebuggerTelemetryInfo *entry) {
+  UserIDResolver &resolver = lldb_private::HostInfo::GetUserIDResolver();


(This is somewhat related to https://github.com/llvm/llvm-project/pull/127696/files#r1961443899)
None of the information collected here is actually specific to the debugger object being created. If some tool embedding lldb creates 10 debugger objects, they will all send an entry with the same data.

It might be that's fine. I'm just saying that another way to organize this would be to have something like a "process startup" entry which reports this data, and then another one which gets sent when the debugger is created (and which only reports the time that took?)

ok - will split this into two

Co-authored-by: Pavel Labath <[email protected]>

github-actions · 2025-02-19T19:26:27Z

✅ With the latest revision this PR passed the C/C++ code formatter.

labath · 2025-02-20T10:59:07Z

I'm trying to figure out how to get that linked only when tests are running and not to the LLDB binary all the time.

I don't think you can do that without building another lldb binary. We actually have something like that in the form of the lldb-test binary. It's not an actual lldb, but it links all of the same components, and it shouldn't be a big problem to add a new component (a fake telemetry backend) that's only used in lldb-test. I think it would be reasonable to add a new lldb-test sub-command which is designed to be used for testing the telemetry backend.

However, it's use tends to be somewhat cumbersome because you have to write some code do something, and then also drive that code from the command line. I would recommend trying to test as much as possible via the unit tests (which are basically can do anything that lldb-test can, but you don't have to drive them from the command line), and use only when you really need an end-to-end experience.

oontvoo · 2025-02-20T14:19:52Z

However, it's use tends to be somewhat cumbersome because you have to write some code do something, and then also drive that code from the command line. I would recommend trying to test as much as possible via the unit tests (which are basically can do anything that lldb-test can, but you don't have to drive them from the command line), and use only when you really need an end-to-end experience.

Ok - thanks for the details! That makes it a lot easier :)

- remove collection of username,cwd,lldb_path (that can be collected in vendor-specific impl) - use DebuggerID as key rather than the debugger pointer - renaming

lldb/source/Core/Telemetry.cpp

labath · 2025-02-21T09:09:24Z

lldb/include/lldb/Core/Telemetry.h

+  std::string description;
+};
+
+struct DebuggerTelemetryInfo : public LLDBBaseTelemetryInfo {


Okay, but the fields are still there. I know why you did this, but I think it's strange to have a field that is never set anywhere, and I definitely wouldn't want to define a field for every piece of information that anybody wants to collect. If we go in this direction (data collected downstream), then I think the fields should be defined downstream as well. One way to do that would be to wrap the upstream member in a vendor-specific event struct which includes the upstream struct as a member (or just a copy of its members). Another would be to attach them directly in the Destination class (that way the custom event struct doesn't even have to exist).

labath · 2025-02-21T09:20:35Z

lldb/source/Core/Telemetry.cpp

+      llvm::dyn_cast<LLDBBaseTelemetryInfo>(entry);
+  std::string session_id = m_id;
+  if (Debugger *debugger = lldb_entry->debugger) {
+    auto session_id_pos = session_ids.find(debugger->getID());


Can we drop the map? I doubt copying the string is noticably faster than constructing it each time (particularly if you use lower level constructs like raw_string_ostream(lldb_entry->SessionId) << m_id << "_" << debugger->getID()

Or even better, use two fields like I originally suggested? Then there's no string construction and it has the advantage that you can choose how to do the analysis: I can imagine that in some situations you may want to see all events from a particular Debugger object, but in others you may want to see "everything that happens within a single process". I think that would be easier to do if the fields are separate.

oontvoo · 2025-02-24T16:39:53Z

@JDevlieghere @labath : W.R.T the telemetry-build flag, I proposed PR. Please have a look (ideally before continuing reviewing this patch since it'd simplify a few things here).
Thanks!

oontvoo · 2025-02-26T20:02:19Z

I think this is ready for review now. New changes:

Incorporate the enable-telemetry flag from the other PR to get rid of all the ifdefs
remove process-exit/startup data from this - stick to debugger's exit/startup only.
- I'd like to find the right place to report debugger's crashes (ie., in the code where it'd normally print "Please file a bug report ....")
add some RAII helper to reduce boilerplate

oontvoo · 2025-02-26T20:39:41Z

lldb/source/API/SBDebugger.cpp

+
+static void InstallCrashTelemetryReporter() {
+#if LLVM_ENABLE_TELEMETRY
+


@labath Is this the right place to add the crash-handler?

Maybe? I don't think there's a single right place to do that, as each option comes with some tradeoffs. It may be best to leave this all up to downstream..

lldb/include/lldb/Core/Telemetry.h

labath · 2025-02-27T07:16:38Z

lldb/include/lldb/Core/Telemetry.h

+    TelemetryManager *instance = TelemetryManager::GetInstance();
+    return instance != nullptr && instance->GetConfig()->EnableTelemetry;


Can we make it so that there is only one thing to check here? Is there any reason for GetInstance to return a valid object if telemetry is disabled?

If telemetry is disabled, the GetInstance() will return nullptr

BUT if GetInstance() returns a valid object, telemetry might still be disabled (at runtime), hence the necessary check.

Okay, but why does GetInstance return a valid object if telemetry is disabled (at runtime)? Why does the TelemetryManager object even get created in this case? (Maybe there is a reason for that, but in that case, I'd like to understand why)

In the original RFC, we proposed a way to toggle OFF/ON Telemetry (eg., via some simple command)

The idea is, the vendor can enable Telemetry at BUILD time. Then users can temporarily turn it off, then back on again.

(It's probably already obviously but should be pointed out that the users can only turn OFF telemetry then ON again if Telemetry was already enabled to begin with.)

Thanks for the explanation. This makes sense, though I still think it's important to have a simple and succinct way to check whether telemetry is "actually" enabled at a given moment. Right now, we sort of have that as this detail is hidden in the RAII object, but if we find ourselves writing manager && manage->IsEnabled() often, would like to create helper function for just that (like a function that doesn't even let you access the telemetry manager object unless you're actually supposed to send something).

labath · 2025-02-27T07:24:35Z

lldb/source/API/SBDebugger.cpp

@@ -226,13 +227,45 @@ lldb::SBError SBDebugger::InitializeWithErrorHandling() {
  return error;
 }

+#if LLVM_ENABLE_TELEMETRY
+#if ENABLE_BACKTRACES


Why is this conditional on ENABLE_BACKTRACES?

Because to get the "exit-description" (ie., why did it crash), we need to print the stacktrace.
(Otherwise, i'm not sure there's a more descriptive way to describe the crash)

labath · 2025-02-27T07:29:22Z

lldb/include/lldb/Core/Telemetry.h

@@ -41,6 +45,7 @@ struct LLDBBaseTelemetryInfo : public llvm::telemetry::TelemetryInfo {
  std::optional<SteadyTimePoint> end_time;
  // TBD: could add some memory stats here too?

+  uint64_t debugger_id = 0;


Does zero mean "no debugger object is associated with this entry" ?

Yes - unless the ID could be zero? 🤔
I can make this an optional if ID could be zero?

Zero is probably fine (the first debugger instance seems to get id=1), but it's worth calling out explicitly.

Ah, never mind. found it. #define LLDB_INVALID_UID UINT64_MAX
So I guess we could init this to LLDB_INVALID_UID

labath · 2025-02-27T07:52:00Z

lldb/source/API/SBDebugger.cpp

+  // Also need to pre-alloc the memory for this entry?
+  lldb_private::telemetry::DebuggerInfo entry;


The object itself is allocated on the stack, which is about the safest place you can allocate memory in a signal handler. The problem is if you have any variable length fields inside the object (strings, vectors, etc.), as they will try to allocate data on the heap. Something like this also has impact on the downstream implementation doing the actual sending, as that needs to be signal-safe as well (and I expect that to be the trickiest part).

Writing async-signal-safe code is very hard. What you're balancing here is the code complexity (and time to write it, etc.) versus the chance that you will not be able to send some of the crash notifications. It's a large scale and I don't know where your balance lies there. The only think I would like to avoid is flaky tests.

What I might try to do is let each downstream implementation make that determination for itself. Since you're not sending much in the way of actual data (a backtrace might be nice, but that could also be gathered elsewhere), maybe you don't need any create any objects here and just have a special telemetry entry point (AtCrash?) that you call and let it do whatever it wants. Or maybe we don't even need the special entry point, as the downstream implementation could register the handler -- if it wants to?

Another reason for a special entry point (or no entry point) is that you are using AtDebuggerExit in two fairly different ways. In Debugger::Clear you are calling it once per debugger, but if a crash happens (this function) you will just send one event (for all debuggers). If the crash happens sufficiently late in the shutdown process (after Debugger::Clear returns) you may even get both. Not the end of the world, but it kind of indicates that these are two different things.

Ok, that's a great point. I'll revert this change and have the crash-handler registered in downstream code (with a new AtCrash callback to avoid confusion) Thanks!

labath · 2025-02-27T08:02:48Z

lldb/source/API/SBDebugger.cpp

+
+static void InstallCrashTelemetryReporter() {
+#if LLVM_ENABLE_TELEMETRY
+


Maybe? I don't think there's a single right place to do that, as each option comes with some tradeoffs. It may be best to leave this all up to downstream..

lldb/source/Core/Telemetry.cpp

labath · 2025-02-27T08:11:32Z

lldb/source/Core/Telemetry.cpp

+  // There must be a reference to the debugger at this point.
+  assert(entry->debugger != nullptr);


This isn't going to be true for crash events.

as discussed in the other comment, the crash-handler will call a different method (e.,g atCrash())

labath · 2025-02-27T08:36:25Z

lldb/source/Core/Debugger.cpp

+      lldb_private::telemetry::TelemetryManager *manager =
+          lldb_private::telemetry::TelemetryManager::GetInstance();
+      lldb_private::telemetry::DebuggerInfo entry;
+      entry.debugger = debugger_sp.get();
+      entry.start_time = helper.GetStartTime();
+      entry.end_time = helper.GetCurrentTime();
+      manager->AtDebuggerStartup(&entry);


This still feels rather boilerplate-y. debugger, start_time and end_time are present in every field, and declaring and sending an event is also something you need to do every time. What if the interface was something like:

ScopeTelemetryCollector<DebuggerInfo> telemetry_helper(debugger_sp.get());

where the object automatically fills declares the object, fills in the common fields and then sends it off in the destructor? It can provide some accessor to the object contained within it to let the code set any additional fields.

The only slightly variable part method that gets called to dispatch the event. But I see several ways to resolve that (roughly in order of preference):

ditch AtDebuggerStartup and have it always call dispatch (the customization can happen in preDispatch)

Declare a set of typed virtual methods with a single name (virtual TypedDispatch(DebuggerInfo *); virtual TypedDispatch(OtherInfo*); ...)

Have each entry type declare the method that should get called.

Have each call site declare the method that should get called: (ScopeTelemetryCollector<DebuggerInfo> telemetry_helper(&TelemetryManager::AtDebuggerStartup, debugger_sp.get()))

This still feels rather boilerplate-y. debugger, start_time and end_time are present in every field, and declaring and sending an event is also something you need to do every time. What if the interface was something like:

ScopeTelemetryCollector<DebuggerInfo> telemetry_helper(debugger_sp.get());

where the object automatically fills declares the object, fills in the common fields and then sends it off in the destructor? It can provide some accessor to the object contained within it to let the code set any additional fields.

How does the helper object know which fields to fill in?
For this specific case, it's not much - just needed the debugger_sp. But for other cases (eg., commandinfo, it needs a bunch of local fields from callsite).
I'm worry we'll be adding too many layer of indirection.

I'd much prefer simple interface over a complex templated interface, as it's clearer what's being collected.

ditch AtDebuggerStartup and have it always call dispatch (the customization can happen in preDispatch)

These AtDebuggerStartup, etc, are how the downstream code can have overriding impl functions to collect additional data for each scenario. The preDispatch is common for ALL scenarios (debugger-startup/shutdown/command-executation/target-load/etc).

Pavel's suggestion is to fill in the common fields, i.e. the ones in LLDBBaseTelemetryInfo. I expressed my dislike for the AtDebuggerStartup and friends in another PR, and if the distinction is important, I'd argue that they should be different events. With option (1), you could do this: https://godbolt.org/z/eazvPWs46

Ok - thx for the suggestion. I applied most of it with some modification (because we want to start the timer BEFORE the debugger_sp is created ).

lldb/source/Core/Telemetry.cpp

Co-authored-by: Pavel Labath <[email protected]>

JDevlieghere · 2025-02-27T15:59:19Z

lldb/include/lldb/Core/Telemetry.h

+
+private:
+  SteadyTimePoint m_start;
+  std::stack<std::function<void()>> m_exit_funcs;


Does this actually need to be a std::stack? By default it wraps a std::deque and I don't think you need to be able to pop from both sides. You can have a std::stack that uses a std::vector underneath, but it's much more common to just push and pop to a vector directly.

no, just wanted to communicate the fact that cleanup functions are executed in the reverse order in which they were created/queued .

JDevlieghere · 2025-02-27T16:03:07Z

lldb/include/lldb/Core/Telemetry.h

  static std::unique_ptr<TelemetryManager> g_instance;
 };

+/// Helper RAII class for collecting telemetry.
+class ScopeTelemetryCollector {


Do you ever need to run more than one function? If not, can we mimic llvm::ScopeExit and make the constructors private and have a make_scope_telemtry function which takes a lambda and, depending on whether telemetry is enabled, puts it into the object and otherwise creates an object that does nothing?

We may need to run a few callbacks throughout (but not in this patch, so i'll apply the change there)

JDevlieghere · 2025-02-27T16:04:34Z

lldb/include/lldb/Core/Telemetry.h

+  SteadyTimePoint GetStartTime() { return m_start; }
+  SteadyTimePoint GetCurrentTime() { return std::chrono::steady_clock::now(); }


If the callback needs to know about the ScopeTelemetryCollector, we might as well make the start and current time arguments to the callback.

JDevlieghere · 2025-02-27T16:05:38Z

lldb/include/lldb/Core/Telemetry.h

+    if (TelemetryEnabled())
+      m_start = std::chrono::steady_clock::now();


Any downside to doing this unconditionally? It's not like m_start is an optional, so unless steady_clock::now() is super expensive, seems easier to always set the correct start time in the ctor.

JDevlieghere · 2025-02-27T17:04:06Z

lldb/source/Core/Debugger.cpp

+      lldb_private::telemetry::TelemetryManager *manager =
+          lldb_private::telemetry::TelemetryManager::GetInstance();
+      lldb_private::telemetry::DebuggerInfo entry;
+      entry.debugger = debugger_sp.get();
+      entry.start_time = helper.GetStartTime();
+      entry.end_time = helper.GetCurrentTime();
+      manager->AtDebuggerStartup(&entry);


Pavel's suggestion is to fill in the common fields, i.e. the ones in LLDBBaseTelemetryInfo. I expressed my dislike for the AtDebuggerStartup and friends in another PR, and if the distinction is important, I'd argue that they should be different events. With option (1), you could do this: https://godbolt.org/z/eazvPWs46

…debugger_info

oontvoo · 2025-02-27T20:45:21Z

@labath @JDevlieghere I think I've resolved all the comments + added test. Please have a look when you can. Thanks!

JDevlieghere

A few more small comments but the overall approach LGTM.

lldb/include/lldb/Core/Telemetry.h

JDevlieghere · 2025-02-27T20:51:03Z

lldb/include/lldb/Core/Telemetry.h

@@ -73,9 +100,50 @@ class TelemetryManager : public llvm::telemetry::Manager {

 private:
  std::unique_ptr<llvm::telemetry::Config> m_config;
+  // Each instance of a TelemetryManager is assigned a unique ID.
+  const std::string m_id;


Should we store this as a UUID (and convert it to a string in the serializer)? I think having a static method in the UUID class that generates a random UUID might be generally useful.

What use case do you have in mind? Right now this field is read-only (copied onto multiple entry objects).

So it seems jsut storing a string directly seems more straightforward.

Putting the generation code into the UUID class seems reasonable. I'm fairly ambivalent about the storage format. One reason for storing it in the UUID form might be if we think that someone could be interested in storing/transmitting the UUID in a binary form (because it's more compact?).

(One thing complicating that is the fact that you're currently appending the timestamp to the string. I don't think that's necessary because the random data should be unique enough, and the (unlikely) case of a missing random source could be handled by initializing a pseudo-random generator with the timestamp and then using that to generate a UUID)

One reason for storing it in the UUID form might be if we think that someone could be interested in storing/transmitting the UUID in a binary form

This is the TelemetryManager - it is never transmitted or serialised anywhere. So why does compactness matter?

If you mean storing the id as UUID in the TelemetryInfo object, then we'd need to update the llvm base interface. However, the reason the id was a string in the base interface was because people wanted flexibility in what the id could be in each implementation.
Also the downside of storing it as UUID object is that you may have different telemetry::Serializer objects generating different values for the same UUID (ie.,different separators?). That's potentially problematic because ultimately, the point of this field is for grouping entries from the same session. We need the key to be identical across entries and not "accidentally" differ because of serialisation methods.

If you mean storing the id as UUID in the TelemetryInfo object, then we'd need to update the llvm base interface. However, the reason the id was a string in the base interface was because people wanted flexibility in what the id could be in each implementation.

I mean that, but yes, the fact that this is in the llvm base class make this tricky. I think this is fine then.

JDevlieghere · 2025-02-27T20:52:37Z

lldb/include/lldb/Core/Telemetry.h

  static std::unique_ptr<TelemetryManager> g_instance;
 };

+/// Helper RAII class for collecting telemetry.
+template <typename I> struct ScopedDispatcher {


If you want to go with the single letter, I would go with T:

Suggested change

template <typename I> struct ScopedDispatcher {

template <typename T> struct ScopedDispatcher {

But I'd go with Info:

Suggested change

template <typename I> struct ScopedDispatcher {

template <typename Info> struct ScopedDispatcher {

done - changed to Info.
(Isn't it usually InfoT ? not sure ...)

Co-authored-by: Jonas Devlieghere <[email protected]>

lldb/include/lldb/Core/Telemetry.h

labath · 2025-02-28T10:51:44Z

lldb/include/lldb/Core/Telemetry.h

@@ -73,9 +100,50 @@ class TelemetryManager : public llvm::telemetry::Manager {

 private:
  std::unique_ptr<llvm::telemetry::Config> m_config;
+  // Each instance of a TelemetryManager is assigned a unique ID.
+  const std::string m_id;


Putting the generation code into the UUID class seems reasonable. I'm fairly ambivalent about the storage format. One reason for storing it in the UUID form might be if we think that someone could be interested in storing/transmitting the UUID in a binary form (because it's more compact?).

(One thing complicating that is the fact that you're currently appending the timestamp to the string. I don't think that's necessary because the random data should be unique enough, and the (unlikely) case of a missing random source could be handled by initializing a pseudo-random generator with the timestamp and then using that to generate a UUID)

lldb/include/lldb/Core/Telemetry.h

labath · 2025-02-28T10:54:49Z

lldb/source/Core/Debugger.cpp

+              // If we are here, then there was no error.
+              // Any abnormal exit will be reported by the crash-handler.


Let's not mention the crash handler here, since it's not upstream.

lldb/source/Core/Debugger.cpp

labath · 2025-02-28T10:58:54Z

lldb/unittests/Core/TelemetryTest.cpp

+    if (auto *fake_entry = llvm::dyn_cast<FakeTelemetryInfo>(entry)) {
+      FakeTelemetryInfo *copied = new FakeTelemetryInfo();
+      copied->msg = fake_entry->msg;
+      copied->num = fake_entry->num;
+      received_entries->push_back(copied);
+    }


As we already have a copy constructor..

Suggested change

if (auto *fake_entry = llvm::dyn_cast<FakeTelemetryInfo>(entry)) {

FakeTelemetryInfo *copied = new FakeTelemetryInfo();

copied->msg = fake_entry->msg;

copied->num = fake_entry->num;

received_entries->push_back(copied);

}

if (auto *fake_entry = llvm::dyn_cast<FakeTelemetryInfo>(entry))

received_entries->push_back(std::make_unique<FakeTelemetryInfo>(*fake_entry));

labath · 2025-02-28T11:27:04Z

lldb/source/Core/Debugger.cpp

+  lldb_private::telemetry::ScopedDispatcher<
+      lldb_private::telemetry::DebuggerInfo>
+      helper([](lldb_private::telemetry::DebuggerInfo *entry) {
+        entry->lldb_version = lldb_private::GetVersion();
+      });
  DebuggerSP debugger_sp(new Debugger(log_callback, baton));
+  helper.SetDebugger(debugger_sp.get());


This is definitely better, but lambdas tend to add some boilerplate of their own. They may be useful in some cases (if you need to set some field at the very end) but I think that in many cases (this one included) we could just set the field directly, so I want to float this idea: I think this would be shorter if we had the dispatcher object expose the telemetry entry it is managing directly:

Suggested change

lldb_private::telemetry::ScopedDispatcher<

lldb_private::telemetry::DebuggerInfo>

helper([](lldb_private::telemetry::DebuggerInfo *entry) {

entry->lldb_version = lldb_private::GetVersion();

});

DebuggerSP debugger_sp(new Debugger(log_callback, baton));

helper.SetDebugger(debugger_sp.get());

telemetry::ScopedDispatcher<telemetry::DebuggerInfo> helper;

DebuggerSP debugger_sp(new Debugger(log_callback, baton));

if (telemetry::DebuggerInfo *info = helper.GetEntry()) {// returns NULL if telemetry is disabled

info->debugger = debugger_sp.get();

info->lldb_version = lldb_private::GetVersion();

}

(and if we do need a callback, I don't see a reason why this couldn't coexist with that)

Any thoughts @JDevlieghere ?

I would prefer not exposing the managed TelemetryInfo because in the next PR, i'm going to move it to being a local var in the execute function. (Reasons being I need to dispatch/execute more than one callbacks - one at the beginning and one at the end of the function. )

On a second thought, how about we make a "NoOp" TelemetryManager that does not do anything in case when Telemetry is disabled? That way, we wouldn't need to check for the flag/ptr.

labath · 2025-02-28T11:28:35Z

lldb/unittests/Core/TelemetryTest.cpp

+  lldb_private::FakePlugin::Initialize();
+  auto *ins = TelemetryManager::GetInstance();
+  ASSERT_NE(ins, nullptr);
+  std::vector<::llvm::telemetry::TelemetryInfo *> expected_entries;


Suggested change

std::vector<::llvm::telemetry::TelemetryInfo *> expected_entries;

std::vector<::llvm::telemetry::TelemetryInfo *> received_entries;

(expected it the thing you expect to receive)

labath · 2025-02-28T11:35:52Z

lldb/include/lldb/Core/Telemetry.h

+    TelemetryManager *instance = TelemetryManager::GetInstance();
+    return instance != nullptr && instance->GetConfig()->EnableTelemetry;


Thanks for the explanation. This makes sense, though I still think it's important to have a simple and succinct way to check whether telemetry is "actually" enabled at a given moment. Right now, we sort of have that as this detail is hidden in the RAII object, but if we find ourselves writing manager && manage->IsEnabled() often, would like to create helper function for just that (like a function that doesn't even let you access the telemetry manager object unless you're actually supposed to send something).

labath · 2025-02-28T11:42:28Z

lldb/source/Core/Debugger.cpp

+            lldb_private::telemetry::TelemetryManager::GetInstance();
+        lldb_private::telemetry::DebuggerInfo entry;
+        entry.debugger = this;
+        entry.exit_desc = {0, ""}; // If we are here, there was no error.


Okay, but if we're not actually sending the crash reports upstream (which I think is the right choice), could we also avoid prescribing how a downstream implementation should represent those crashes. For example, someone might want to send the backtrace in a more structured form (list of addresses?) instead of a string. Fixed-size entries are a lot easier to handle in an async-signal context than a string (which basically cannot be used safely).

IOW, can we drop the exit_desc field from this struct?

Co-authored-by: Pavel Labath <[email protected]>

oontvoo · 2025-02-28T14:12:14Z

@labath

IOW, can we drop the exit_desc field from this struct?

Well, we need a way to distinguish the start and the exit entries
How about just keeping exit_code (which is set to zero)

I still think it's important to have a simple and succinct way to check whether telemetry is "actually" enabled at a given moment

We can have a static TelemetryManager::TelemetryCurrentEnabled() which does the check.

labath · 2025-02-28T14:21:36Z

@labath

IOW, can we drop the exit_desc field from this struct?

Well, we need a way to distinguish the start and the exit entries How about just keeping exit_code (which is set to zero)

You could distinguish them with a bool or enum field. I don't think an exit_code field makes sense if it's always going to be set to zero. I think it would make sense if you set it to GetCommandInterpreter().GetQuitExitCode().

I still think it's important to have a simple and succinct way to check whether telemetry is "actually" enabled at a given moment

We can have a static TelemetryManager::TelemetryCurrentEnabled() which does the check.

Maybe. Or maybe TelemetryManager::GetInstanceIfEnabled (not necessarily with that name) so you don't have to follow this up with a call to GetInstance(). Let's see how the code develops...

…debugger_info

oontvoo · 2025-03-03T15:07:05Z

Thanks, I'll merge this now. We can fine-tune the helper interface in the next PRs, which present slightly more complicated usecase.

…vm#127696) This type of entry is used to collect data about the debugger startup/exit Also introduced a helper ScopedDispatcher --------- Co-authored-by: Jonas Devlieghere <[email protected]> Co-authored-by: Pavel Labath <[email protected]>

oontvoo requested a review from labath February 18, 2025 21:01

oontvoo requested a review from JDevlieghere as a code owner February 18, 2025 21:01

llvmbot added the lldb label Feb 18, 2025

oontvoo commented Feb 18, 2025

View reviewed changes

JDevlieghere reviewed Feb 18, 2025

View reviewed changes

Update lldb/include/lldb/Core/Telemetry.h

496c370

Co-authored-by: Jonas Devlieghere <[email protected]>

labath reviewed Feb 19, 2025

View reviewed changes

lldb/source/Core/Telemetry.cpp Outdated Show resolved Hide resolved

labath reviewed Feb 19, 2025

View reviewed changes

lldb/source/Core/Telemetry.cpp Outdated Show resolved Hide resolved

labath reviewed Feb 19, 2025

View reviewed changes

Update lldb/source/Core/Telemetry.cpp

11efc09

Co-authored-by: Pavel Labath <[email protected]>

adddressed review comments:

5aec284

- remove collection of username,cwd,lldb_path (that can be collected in vendor-specific impl) - use DebuggerID as key rather than the debugger pointer - renaming

labath reviewed Feb 21, 2025

View reviewed changes

oontvoo added 4 commits February 24, 2025 13:39

remove unused fields

4b6a4bd

Merge branch 'main' into debugger_info

83bb7c4

addressed review comments

4dd0782

format

7ad8360

try reporting the crash from a crash handler

d7b1275

oontvoo commented Feb 26, 2025

View reviewed changes

labath reviewed Feb 27, 2025

View reviewed changes

oontvoo and others added 2 commits February 27, 2025 09:56

revert crash handler changes

302552a

Update lldb/source/Core/Telemetry.cpp

c00cd54

Co-authored-by: Pavel Labath <[email protected]>

oontvoo added 3 commits February 27, 2025 10:17

remove unused headers

597185f

remove miscinfo kindtype

41f6649

Init debugger_id to LLDB_INVALID_UID

169449b

JDevlieghere reviewed Feb 27, 2025

View reviewed changes

oontvoo added 6 commits February 27, 2025 14:54

addressed review comment

7420811

Merge branch 'debugger_info' of github.com:oontvoo/llvm-project into …

0fb078d

…debugger_info

removed unused

68f95ab

add smoke check

d586956

remove unused headers

8b7afd6

added test

21b8bdf

formatting

7776ea1

JDevlieghere reviewed Feb 27, 2025

View reviewed changes

oontvoo and others added 3 commits February 27, 2025 15:56

Update lldb/include/lldb/Core/Telemetry.h

c70bd5d

Co-authored-by: Jonas Devlieghere <[email protected]>

change template to Info

bcd793e

formatting

4ba0431

labath approved these changes Feb 28, 2025

View reviewed changes

Update lldb/source/Core/Debugger.cpp

6027a69

Co-authored-by: Pavel Labath <[email protected]>

oontvoo added 3 commits February 28, 2025 10:18

address review comments

c011531

Merge branch 'debugger_info' of github.com:oontvoo/llvm-project into …

a9fd8ad

…debugger_info

formatting

17ced2e

Merge branch 'main' into debugger_info

d07dc38

oontvoo merged commit a088b0e into llvm:main Mar 3, 2025
6 of 9 checks passed

oontvoo deleted the debugger_info branch June 2, 2025 17:17


		static void InstallCrashTelemetryReporter() {
		#if LLVM_ENABLE_TELEMETRY

		TelemetryManager *instance = TelemetryManager::GetInstance();
		return instance != nullptr && instance->GetConfig()->EnableTelemetry;

		// Also need to pre-alloc the memory for this entry?
		lldb_private::telemetry::DebuggerInfo entry;

		// There must be a reference to the debugger at this point.
		assert(entry->debugger != nullptr);

		SteadyTimePoint GetStartTime() { return m_start; }
		SteadyTimePoint GetCurrentTime() { return std::chrono::steady_clock::now(); }

		if (TelemetryEnabled())
		m_start = std::chrono::steady_clock::now();

	template <typename I> struct ScopedDispatcher {
	template <typename T> struct ScopedDispatcher {

	template <typename I> struct ScopedDispatcher {
	template <typename Info> struct ScopedDispatcher {

		// If we are here, then there was no error.
		// Any abnormal exit will be reported by the crash-handler.

	std::vector<::llvm::telemetry::TelemetryInfo *> expected_entries;
	std::vector<::llvm::telemetry::TelemetryInfo *> received_entries;

[LLDB][Telemetry]Define DebuggerTelemetryInfo and related methods #127696

[LLDB][Telemetry]Define DebuggerTelemetryInfo and related methods #127696

Uh oh!

Conversation

oontvoo commented Feb 18, 2025

Uh oh!

llvmbot commented Feb 18, 2025

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

JDevlieghere Feb 19, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

oontvoo Feb 19, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

oontvoo Feb 19, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

oontvoo Feb 20, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

JDevlieghere Feb 19, 2025 •

edited

Loading

oontvoo Feb 19, 2025 •

edited

Loading

oontvoo Feb 19, 2025 •

edited

Loading

oontvoo Feb 20, 2025 •

edited

Loading

github-actions bot commented Feb 19, 2025 •

edited

Loading

labath commented Feb 20, 2025 •

edited

Loading

labath Feb 21, 2025 •

edited

Loading