llvm · mtrofin · May 9, 2024 · May 9, 2024 · May 9, 2024 · May 9, 2024
diff --git a/compiler-rt/lib/ctx_profile/CMakeLists.txt b/compiler-rt/lib/ctx_profile/CMakeLists.txt
@@ -5,6 +5,7 @@ set(CTX_PROFILE_SOURCES
   )
 
 set(CTX_PROFILE_HEADERS
+  CtxInstrContextNode.h
   CtxInstrProfiling.h
   )
 

diff --git a/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h b/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
@@ -0,0 +1,116 @@
+//===--- CtxInstrContextNode.h - Contextual Profile Node --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//==============================================================================
+//
+// NOTE!
+// llvm/lib/ProfileData/CtxInstrContextNode.h and
+//   compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
+// must be exact copies of eachother
+//
+// compiler-rt creates these objects as part of the instrumentation runtime for
+// contextual profiling. LLVM only consumes them to convert a contextual tree
+// to a bitstream.
+//
+//==============================================================================
+
+/// The contextual profile is a directed tree where each node has one parent. A
+/// node (ContextNode) corresponds to a function activation. The root of the
+/// tree is at a function that was marked as entrypoint to the compiler. A node
+/// stores counter values for edges and a vector of subcontexts. These are the
+/// contexts of callees. The index in the subcontext vector corresponds to the
+/// index of the callsite (as was instrumented via llvm.instrprof.callsite). At
+/// that index we find a linked list, potentially empty, of ContextNodes. Direct
+/// calls will have 0 or 1 values in the linked list, but indirect callsites may
+/// have more.
+///
+/// The ContextNode has a fixed sized header describing it - the GUID of the
+/// function, the size of the counter and callsite vectors. It is also an
+/// (intrusive) linked list for the purposes of the indirect call case above.
+///
+/// Allocation is expected to happen on an Arena. The allocation lays out inline
+/// the counter and subcontexts vectors. The class offers APIs to correctly
+/// reference the latter.
+///
+/// The layout is as follows:
+///
+/// [[declared fields][counters vector][vector of ptrs to subcontexts]]
+///
+/// See also documentation on the counters and subContexts members below.
+///
+/// The structure of the ContextNode is known to LLVM, because LLVM needs to:
+///   (1) increment counts, and
+///   (2) form a GEP for the position in the subcontext list of a callsite
+/// This means changes to LLVM contextual profile lowering and changes here
+/// must be coupled.
+/// Note: the header content isn't interesting to LLVM (other than its size)
+///
+/// Part of contextual collection is the notion of "scratch contexts". These are
+/// buffers that are "large enough" to allow for memory-safe acceses during
+/// counter increments - meaning the counter increment code in LLVM doesn't need
+/// to be concerned with memory safety. Their subcontexts never get populated,
+/// though. The runtime code here produces and recognizes them.
+
+#ifndef LLVM_LIB_PROFILEDATA_CTXINSTRCONTEXTNODE_H
+#define LLVM_LIB_PROFILEDATA_CTXINSTRCONTEXTNODE_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+namespace llvm {
+namespace ctx_profile {
+using GUID = uint64_t;
+
+class ContextNode final {
+  const GUID Guid;
+  ContextNode *const Next;
+  const uint32_t NrCounters;
+  const uint32_t NrCallsites;
+
+public:
+  ContextNode(GUID Guid, uint32_t NrCounters, uint32_t NrCallsites,
+              ContextNode *Next = nullptr)
+      : Guid(Guid), Next(Next), NrCounters(NrCounters),
+        NrCallsites(NrCallsites) {}
+
+  static inline size_t getAllocSize(uint32_t NrCounters, uint32_t NrCallsites) {
+    return sizeof(ContextNode) + sizeof(uint64_t) * NrCounters +
+           sizeof(ContextNode *) * NrCallsites;
+  }
+
+  // The counters vector starts right after the static header.
+  uint64_t *counters() {
+    ContextNode *addr_after = &(this[1]);
+    return reinterpret_cast<uint64_t *>(addr_after);
+  }
+
+  uint32_t counters_size() const { return NrCounters; }
+  uint32_t callsites_size() const { return NrCallsites; }
+
+  const uint64_t *counters() const {
+    return const_cast<ContextNode *>(this)->counters();
+  }
+
+  // The subcontexts vector starts right after the end of the counters vector.
+  ContextNode **subContexts() {
+    return reinterpret_cast<ContextNode **>(&(counters()[NrCounters]));
+  }
+
+  ContextNode *const *subContexts() const {
+    return const_cast<ContextNode *>(this)->subContexts();
+  }
+
+  GUID guid() const { return Guid; }
+  ContextNode *next() const { return Next; }
+
+  size_t size() const { return getAllocSize(NrCounters, NrCallsites); }
+
+  uint64_t entrycount() const { return counters()[0]; }
+};
+} // namespace ctx_profile
+} // namespace llvm
+#endif
diff --git a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
@@ -90,6 +90,26 @@ bool validate(const ContextRoot *Root) {
   }
   return true;
 }
+
+inline ContextNode *allocContextNode(char *Place, GUID Guid,
+                                     uint32_t NrCounters, uint32_t NrCallsites,
+                                     ContextNode *Next = nullptr) {
+  assert(reinterpret_cast<uint64_t>(Place) % ExpectedAlignment == 0);
+  return new (Place) ContextNode(Guid, NrCounters, NrCallsites, Next);
+}
+
+void resetContextNode(ContextNode &Node) {
+  // FIXME(mtrofin): this is std::memset, which we can probably use if we
+  // drop/reduce the dependency on sanitizer_common.
+  for (uint32_t I = 0; I < Node.counters_size(); ++I)
+    Node.counters()[I] = 0;
+  for (uint32_t I = 0; I < Node.callsites_size(); ++I)
+    for (auto *Next = Node.subContexts()[I]; Next; Next = Next->next())
+      resetContextNode(*Next);
+}
+
+void onContextEnter(ContextNode &Node) { ++Node.counters()[0]; }
+
 } // namespace
 
 // the scratch buffer - what we give when we can't produce a real context (the
@@ -134,27 +154,9 @@ void Arena::freeArenaList(Arena *&A) {
   A = nullptr;
 }
 
-inline ContextNode *ContextNode::alloc(char *Place, GUID Guid,
-                                       uint32_t NrCounters,
-                                       uint32_t NrCallsites,
-                                       ContextNode *Next) {
-  assert(reinterpret_cast<uint64_t>(Place) % ExpectedAlignment == 0);
-  return new (Place) ContextNode(Guid, NrCounters, NrCallsites, Next);
-}
-
-void ContextNode::reset() {
-  // FIXME(mtrofin): this is std::memset, which we can probably use if we
-  // drop/reduce the dependency on sanitizer_common.
-  for (uint32_t I = 0; I < NrCounters; ++I)
-    counters()[I] = 0;
-  for (uint32_t I = 0; I < NrCallsites; ++I)
-    for (auto *Next = subContexts()[I]; Next; Next = Next->Next)
-      Next->reset();
-}
-
 // If this is the first time we hit a callsite with this (Guid) particular
 // callee, we need to allocate.
-ContextNode *getCallsiteSlow(uint64_t Guid, ContextNode **InsertionPoint,
+ContextNode *getCallsiteSlow(GUID Guid, ContextNode **InsertionPoint,
                              uint32_t NrCounters, uint32_t NrCallsites) {
   auto AllocSize = ContextNode::getAllocSize(NrCounters, NrCallsites);
   auto *Mem = __llvm_ctx_profile_current_context_root->CurrentMem;
@@ -169,8 +171,8 @@ ContextNode *getCallsiteSlow(uint64_t Guid, ContextNode **InsertionPoint,
         Mem->allocateNewArena(getArenaAllocSize(AllocSize), Mem);
     AllocPlace = Mem->tryBumpAllocate(AllocSize);
   }
-  auto *Ret = ContextNode::alloc(AllocPlace, Guid, NrCounters, NrCallsites,
-                                 *InsertionPoint);
+  auto *Ret = allocContextNode(AllocPlace, Guid, NrCounters, NrCallsites,
+                               *InsertionPoint);
   *InsertionPoint = Ret;
   return Ret;
 }
@@ -224,7 +226,7 @@ ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
                         "Context: %p, Asked: %lu %u %u, Got: %lu %u %u \n",
                         Ret, Guid, NrCallsites, NrCounters, Ret->guid(),
                         Ret->callsites_size(), Ret->counters_size());
-  Ret->onEntry();
+  onContextEnter(*Ret);
   return Ret;
 }
 
@@ -241,8 +243,8 @@ void setupContext(ContextRoot *Root, GUID Guid, uint32_t NrCounters,
   auto *M = Arena::allocateNewArena(getArenaAllocSize(Needed));
   Root->FirstMemBlock = M;
   Root->CurrentMem = M;
-  Root->FirstNode = ContextNode::alloc(M->tryBumpAllocate(Needed), Guid,
-                                       NrCounters, NrCallsites);
+  Root->FirstNode = allocContextNode(M->tryBumpAllocate(Needed), Guid,
+                                     NrCounters, NrCallsites);
   AllContextRoots.PushBack(Root);
 }
 
@@ -254,7 +256,7 @@ ContextNode *__llvm_ctx_profile_start_context(
   }
   if (Root->Taken.TryLock()) {
     __llvm_ctx_profile_current_context_root = Root;
-    Root->FirstNode->onEntry();
+    onContextEnter(*Root->FirstNode);
     return Root->FirstNode;
   }
   // If this thread couldn't take the lock, return scratch context.
@@ -281,13 +283,13 @@ void __llvm_ctx_profile_start_collection() {
     for (auto *Mem = Root->FirstMemBlock; Mem; Mem = Mem->next())
       ++NrMemUnits;
 
-    Root->FirstNode->reset();
+    resetContextNode(*Root->FirstNode);
   }
   __sanitizer::Printf("[ctxprof] Initial NrMemUnits: %zu \n", NrMemUnits);
 }
 
-bool __llvm_ctx_profile_fetch(
-    void *Data, bool (*Writer)(void *W, const __ctx_profile::ContextNode &)) {
+bool __llvm_ctx_profile_fetch(void *Data,
+                              bool (*Writer)(void *W, const ContextNode &)) {
   assert(Writer);
   __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock(
       &AllContextsMutex);
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,6 +5,7 @@ set(CTX_PROFILE_SOURCES @@
       )
     set(CTX_PROFILE_HEADERS
+      CtxInstrContextNode.h
       CtxInstrProfiling.h
       )
@@ Expand Down @@