llvm · labath · Dec 5, 2024 · Dec 2, 2024 · Dec 5, 2024
@@ -23,6 +23,7 @@
 #include "lldb/Utility/Timer.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/ThreadPool.h"
+#include <atomic>
 #include <optional>
 
 using namespace lldb_private;
@@ -81,44 +82,53 @@ void ManualDWARFIndex::Index() {
   Progress progress("Manually indexing DWARF", module_desc.GetData(),
                     total_progress);
 
-  std::vector<IndexSet> sets(units_to_index.size());
-
-  // Keep memory down by clearing DIEs for any units if indexing
-  // caused us to load the unit's DIEs.
-  std::vector<std::optional<DWARFUnit::ScopedExtractDIEs>> clear_cu_dies(
-      units_to_index.size());
-  auto parser_fn = [&](size_t cu_idx) {
-    IndexUnit(*units_to_index[cu_idx], dwp_dwarf, sets[cu_idx]);
-    progress.Increment();
-  };
-
-  auto extract_fn = [&](size_t cu_idx) {
-    clear_cu_dies[cu_idx] = units_to_index[cu_idx]->ExtractDIEsScoped();
-    progress.Increment();
-  };
-
   // Share one thread pool across operations to avoid the overhead of
   // recreating the threads.
   llvm::ThreadPoolTaskGroup task_group(Debugger::GetThreadPool());
+  const size_t num_threads = Debugger::GetThreadPool().getMaxConcurrency();
+
+  // Run a function for each compile unit in parallel using as many threads as
+  // are available. This is significantly faster than submiting a new task for
+  // each unit.
+  auto for_each_unit = [&](auto &&fn) {
+    std::atomic<size_t> next_cu_idx = 0;
+    auto wrapper = [&fn, &next_cu_idx, &units_to_index,
+                    &progress](size_t worker_id) {
+      size_t cu_idx;
+      while ((cu_idx = next_cu_idx.fetch_add(1, std::memory_order_relaxed)) <
+             units_to_index.size()) {
+        fn(worker_id, cu_idx, units_to_index[cu_idx]);
+        progress.Increment();
+      }
+    };
 
-  // Create a task runner that extracts dies for each DWARF unit in a
-  // separate thread.
-  // First figure out which units didn't have their DIEs already
-  // parsed and remember this.  If no DIEs were parsed prior to this index
-  // function call, we are going to want to clear the CU dies after we are
-  // done indexing to make sure we don't pull in all DWARF dies, but we need
-  // to wait until all units have been indexed in case a DIE in one
-  // unit refers to another and the indexes accesses those DIEs.
-  for (size_t i = 0; i < units_to_index.size(); ++i)
-    task_group.async(extract_fn, i);
-  task_group.wait();
+    for (size_t i = 0; i < num_threads; ++i)
+      task_group.async(wrapper, i);
 
-  // Now create a task runner that can index each DWARF unit in a
-  // separate thread so we can index quickly.
-  for (size_t i = 0; i < units_to_index.size(); ++i)
-    task_group.async(parser_fn, i);
-  task_group.wait();
+    task_group.wait();
+  };
 
+  // Extract dies for all DWARFs unit in parallel.  Figure out which units
+  // didn't have their DIEs already parsed and remember this.  If no DIEs were
+  // parsed prior to this index function call, we are going to want to clear the
+  // CU dies after we are done indexing to make sure we don't pull in all DWARF
+  // dies, but we need to wait until all units have been indexed in case a DIE
+  // in one unit refers to another and the indexes accesses those DIEs.
+  std::vector<std::optional<DWARFUnit::ScopedExtractDIEs>> clear_cu_dies(
+      units_to_index.size());
+  for_each_unit([&clear_cu_dies](size_t, size_t idx, DWARFUnit *unit) {
+    clear_cu_dies[idx] = unit->ExtractDIEsScoped();
+  });
+
+  // Now index all DWARF unit in parallel.
+  std::vector<IndexSet> sets(num_threads);
+  for_each_unit(
+      [this, dwp_dwarf, &sets](size_t worker_id, size_t, DWARFUnit *unit) {
+        IndexUnit(*unit, dwp_dwarf, sets[worker_id]);
+      });
+
+  // Merge partial indexes into a single index. Process each index in a set in
+  // parallel.
   auto finalize_fn = [this, &sets, &progress](NameToDIE(IndexSet::*index)) {
     NameToDIE &result = m_set.*index;
     for (auto &set : sets)