[mlgo] Introduce an "InteractiveModelRunner"

mtrofin · mtrofin · commit 5b8dc7c8a552 · 2023-01-27T17:03:28.000-08:00
This is a model runner for ML researchers using environments like CompilerGym. In such environments, researchers host the compiler and want to be able to observe the problem space (features) at each decision step of some optimization pass, at which point the compiler is stopped, waiting for the host makes a decision and provide an advice back to the compiler, which then continues its normal operation, and so on. The InteractiveModelRunner supports this scenario for the feature set exposed by the compiler at a given time. It uses 2 files - ideally FIFO pipes - one to pass data to the host, the other to get advices back from the host. This means this scenario is supported with no special dependencies. The file creation and deletion is the responsibility of the host. Hooking up this model evaluator to a MLGO-ed pass is the responsibilty of the pass author, and subsequent patches will do so for the current set of mlgo passes, and offer an API to easily "just opt in" by default when mlgo-ing a new pass. The data protocol is that of the training logger: the host sees a training log doled out observation by observation by reading from one of the files, and passes back its advice as a serialized tensor (i.e. tensor value memory dump) via the other file. There are some differences wrt the log seen during training: the interactive model doesn't currently include the outcome (because it should be identical to the decision, and it's also not present in the "release" mode); and partial rewards aren't currently communicated back. The assumption - just like with the training logger - is that the host is co-located, thus avoiding any endianness concerns. In a distributed environment, it is up to the hosting infrastructure to intermediate that. Differential Revision: https://reviews.llvm.org/D142642
diff --git a/llvm/include/llvm/Analysis/InteractiveModelRunner.h b/llvm/include/llvm/Analysis/InteractiveModelRunner.h
@@ -0,0 +1,61 @@
+//===- InteractiveModelRunner.h ---- "gym" ML model runner  -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_ANALYSIS_INTERACTIVEMODELRUNNER_H
+#define LLVM_ANALYSIS_INTERACTIVEMODELRUNNER_H
+
+#include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/Analysis/TensorSpec.h"
+#include "llvm/Analysis/Utils/TrainingLogger.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/raw_ostream.h"
+#include <system_error>
+
+namespace llvm {
+
+/// A MLModelRunner that asks for advice from an external agent, or host. It
+/// uses 2 files - ideally named pipes - one to send data to that agent, and
+/// one to receive advice.
+/// The data exchange uses the training logger (Utils/TrainingLogger.h) format.
+/// Specifically, the compiler will send the log header, set the context, and
+/// send observations; the host is expected to reply with a tensor value after
+/// each observation as a binary buffer that's conforming to the shape of the
+/// advice. Interleaved, the data closely resembles the training log for a
+/// log where we don't capture the reward signal.
+///
+/// Note that the correctness of the received data is the responsibility of the
+/// host. In particular, if insufficient data were sent, the compiler will block
+/// when waiting for an advice.
+class InteractiveModelRunner : public MLModelRunner {
+public:
+  InteractiveModelRunner(LLVMContext &Ctx,
+                         const std::vector<TensorSpec> &Inputs,
+                         const TensorSpec &Advice, StringRef OutboundName,
+                         StringRef InboundName);
+
+  static bool classof(const MLModelRunner *R) {
+    return R->getKind() == MLModelRunner::Kind::Interactive;
+  }
+  void switchContext(StringRef Name) {
+    Log.switchContext(Name);
+    Log.flush();
+  }
+
+private:
+  void *evaluateUntyped() override;
+  const std::vector<TensorSpec> InputSpecs;
+  const TensorSpec OutputSpec;
+  std::error_code OutEC;
+  std::error_code InEC;
+  raw_fd_stream Inbound;
+  std::vector<char> OutputBuffer;
+  Logger Log;
+};
+} // namespace llvm
+#endif // LLVM_ANALYSIS_INTERACTIVEMODELRUNNER_H
diff --git a/llvm/include/llvm/Analysis/MLModelRunner.h b/llvm/include/llvm/Analysis/MLModelRunner.h
@@ -47,7 +47,7 @@ class MLModelRunner {
     return (const_cast<MLModelRunner *>(this))->getTensorUntyped(Index);
   }
 
-  enum class Kind : int { Unknown, Release, Development, NoOp };
+  enum class Kind : int { Unknown, Release, Development, NoOp, Interactive };
   Kind getKind() const { return Type; }
 
 protected:
diff --git a/llvm/include/llvm/Analysis/TensorSpec.h b/llvm/include/llvm/Analysis/TensorSpec.h
@@ -103,6 +103,9 @@ class TensorSpec final {
   size_t ElementSize = 0;
 };
 
+/// For debugging.
+std::string tensorValueToString(const char *Buffer, const TensorSpec &Spec);
+
 /// Construct a TensorSpec from a JSON dictionary of the form:
 /// { "name": <string>,
 ///   "port": <int>,
diff --git a/llvm/include/llvm/Analysis/Utils/TrainingLogger.h b/llvm/include/llvm/Analysis/Utils/TrainingLogger.h
@@ -116,6 +116,7 @@ class Logger final {
   void switchContext(StringRef Name);
   void startObservation();
   void endObservation();
+  void flush() { OS->flush(); }
 
   const std::string &currentContext() const { return CurrentContext; }
 
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
@@ -76,6 +76,7 @@ add_llvm_component_library(LLVMAnalysis
   InstCount.cpp
   InstructionPrecedenceTracking.cpp
   InstructionSimplify.cpp
+  InteractiveModelRunner.cpp
   Interval.cpp
   IntervalPartition.cpp
   LazyBranchProbabilityInfo.cpp
diff --git a/llvm/lib/Analysis/InteractiveModelRunner.cpp b/llvm/lib/Analysis/InteractiveModelRunner.cpp
@@ -0,0 +1,77 @@
+//===- InteractiveModelRunner.cpp - noop ML model runner   ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A runner that communicates with an external agent via 2 file descriptors.
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/InteractiveModelRunner.h"
+#include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/Analysis/TensorSpec.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define _IMR_CL_VALS(T, N) clEnumValN(TensorType::N, #T, #T),
+
+static cl::opt<TensorType> DebugReply(
+    "interactive-model-runner-echo-type", cl::init(TensorType::Invalid),
+    cl::Hidden,
+    cl::desc("The InteractiveModelRunner will echo back to stderr "
+             "the data received "
+             "from the host as the specified type (for debugging purposes)."),
+    cl::values(SUPPORTED_TENSOR_TYPES(_IMR_CL_VALS)
+                   clEnumValN(TensorType::Invalid, "disable", "Don't echo")));
+
+#undef _IMR_CL_VALS
+
+InteractiveModelRunner::InteractiveModelRunner(
+    LLVMContext &Ctx, const std::vector<TensorSpec> &Inputs,
+    const TensorSpec &Advice, StringRef OutboundName, StringRef InboundName)
+    : MLModelRunner(Ctx, MLModelRunner::Kind::Interactive, Inputs.size()),
+      InputSpecs(Inputs), OutputSpec(Advice), Inbound(InboundName, InEC),
+      OutputBuffer(OutputSpec.getTotalTensorBufferSize()),
+      Log(std::make_unique<raw_fd_ostream>(OutboundName, OutEC), InputSpecs,
+          Advice, /*IncludeReward=*/false) {
+  if (InEC) {
+    Ctx.emitError("Cannot open inbound file: " + InEC.message());
+    return;
+  }
+  if (OutEC) {
+    Ctx.emitError("Cannot open outbound file: " + OutEC.message());
+    return;
+  }
+  // Just like in the no inference case, this will allocate an appropriately
+  // sized buffer.
+  for (size_t I = 0; I < InputSpecs.size(); ++I)
+    setUpBufferForTensor(I, InputSpecs[I], nullptr);
+  Log.flush();
+}
+
+void *InteractiveModelRunner::evaluateUntyped() {
+  Log.startObservation();
+  for (size_t I = 0; I < InputSpecs.size(); ++I)
+    Log.logTensorValue(I, reinterpret_cast<const char *>(getTensorUntyped(I)));
+  Log.endObservation();
+  Log.flush();
+
+  size_t InsPoint = 0;
+  char *Buff = OutputBuffer.data();
+  const size_t Limit = OutputBuffer.size();
+  while (InsPoint < Limit) {
+    auto Read = Inbound.read(Buff + InsPoint, OutputBuffer.size() - InsPoint);
+    if (Read < 0) {
+      Ctx.emitError("Failed reading from inbound file");
+      break;
+    }
+    InsPoint += Read;
+  }
+  if (DebugReply != TensorType::Invalid)
+    dbgs() << tensorValueToString(OutputBuffer.data(), OutputSpec);
+  return OutputBuffer.data();
+}
diff --git a/llvm/lib/Analysis/TensorSpec.cpp b/llvm/lib/Analysis/TensorSpec.cpp
@@ -10,8 +10,10 @@
 // utils.
 //
 //===----------------------------------------------------------------------===//
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Config/config.h"
 
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/TensorSpec.h"
 #include "llvm/Support/CommandLine.h"
@@ -102,4 +104,21 @@ std::optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
   return std::nullopt;
 }
 
+std::string tensorValueToString(const char *Buffer, const TensorSpec &Spec) {
+  switch (Spec.type()) {
+#define _IMR_DBG_PRINTER(T, N)                                                 \
+  case TensorType::N: {                                                        \
+    const T *TypedBuff = reinterpret_cast<const T *>(Buffer);                  \
+    auto R = llvm::make_range(TypedBuff, TypedBuff + Spec.getElementCount());  \
+    return llvm::join(                                                         \
+        llvm::map_range(R, [](T V) { return std::to_string(V); }), ",");       \
+  }
+    SUPPORTED_TENSOR_TYPES(_IMR_DBG_PRINTER)
+#undef _IMR_DBG_PRINTER
+  case TensorType::Total:
+  case TensorType::Invalid:
+    llvm_unreachable("invalid tensor type");
+  }
+}
+
 } // namespace llvm
diff --git a/llvm/unittests/Analysis/MLModelRunnerTest.cpp b/llvm/unittests/Analysis/MLModelRunnerTest.cpp
@@ -7,10 +7,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/Analysis/InteractiveModelRunner.h"
 #include "llvm/Analysis/NoInferenceModelRunner.h"
 #include "llvm/Analysis/ReleaseModeModelRunner.h"
+#include "llvm/Support/BinaryByteStream.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/raw_ostream.h"
 #include "gtest/gtest.h"
 
+#include <atomic>
+#include <thread>
+
 using namespace llvm;
 
 namespace llvm {
@@ -116,4 +124,135 @@ TEST(ReleaseModeRunner, ExtraFeaturesOutOfOrder) {
   EXPECT_EQ(*Evaluator->getTensor<int64_t>(0), 1);
   EXPECT_EQ(*Evaluator->getTensor<int64_t>(1), 2);
   EXPECT_EQ(*Evaluator->getTensor<int64_t>(2), -3);
+}
+
+TEST(InteractiveModelRunner, Evaluation) {
+  LLVMContext Ctx;
+  // Test the interaction with an external advisor by asking for advice twice.
+  // Use simple values, since we use the Logger underneath, that's tested more
+  // extensively elsewhere.
+  std::vector<TensorSpec> Inputs{
+      TensorSpec::createSpec<int64_t>("a", {1}),
+      TensorSpec::createSpec<int64_t>("b", {1}),
+      TensorSpec::createSpec<int64_t>("c", {1}),
+  };
+  TensorSpec AdviceSpec = TensorSpec::createSpec<float>("advice", {1});
+
+  // Create the 2 files. Ideally we'd create them as named pipes, but that's not
+  // quite supported by the generic API.
+  std::error_code EC;
+  SmallString<64> FromCompilerName;
+  SmallString<64> ToCompilerName;
+  int FromCompilerFD = 0;
+  int ToCompilerFD = 0;
+  ASSERT_EQ(sys::fs::createTemporaryFile("InteractiveModelRunner_Evaluation",
+                                         "temp", FromCompilerFD,
+                                         FromCompilerName),
+            std::error_code());
+
+  ASSERT_EQ(sys::fs::createTemporaryFile("InteractiveModelRunner_Evaluation",
+                                         "temp", ToCompilerFD, ToCompilerName),
+            std::error_code());
+
+  raw_fd_stream FromCompiler(FromCompilerName, EC);
+  EXPECT_FALSE(EC);
+  raw_fd_ostream ToCompiler(ToCompilerName, EC);
+  EXPECT_FALSE(EC);
+  FileRemover Cleanup1(FromCompilerName);
+  FileRemover Cleanup2(ToCompilerName);
+  InteractiveModelRunner Evaluator(Ctx, Inputs, AdviceSpec, FromCompilerName,
+                                   ToCompilerName);
+
+  Evaluator.switchContext("hi");
+
+  // Helper to read headers and other json lines.
+  SmallVector<char, 1024> Buffer;
+  auto ReadLn = [&]() {
+    Buffer.clear();
+    while (true) {
+      char Chr = 0;
+      auto Read = FromCompiler.read(&Chr, 1);
+      EXPECT_GE(Read, 0);
+      if (!Read)
+        continue;
+      if (Chr == '\n')
+        return StringRef(Buffer.data(), Buffer.size());
+      Buffer.push_back(Chr);
+    }
+  };
+  // See include/llvm/Analysis/Utils/TrainingLogger.h
+  // First comes the header
+  auto Header = json::parse(ReadLn());
+  EXPECT_FALSE(Header.takeError());
+  EXPECT_NE(Header->getAsObject()->getArray("features"), nullptr);
+  // Then comes the context
+  EXPECT_FALSE(json::parse(ReadLn()).takeError());
+
+  // Since the evaluator sends the features over and then blocks waiting for
+  // an answer, we must spawn a thread playing the role of the advisor / host:
+  std::atomic<int> SeenObservations = 0;
+  std::thread Advisor([&]() {
+    EXPECT_EQ(SeenObservations, 0);
+    int64_t Features[3] = {0};
+    auto FullyRead = [&]() {
+      size_t InsPt = 0;
+      const size_t ToRead = 3 * Inputs[0].getTotalTensorBufferSize();
+      char *Buff = reinterpret_cast<char *>(Features);
+      while (InsPt < ToRead) {
+        auto Read = FromCompiler.read(Buff + InsPt, ToRead - InsPt);
+        EXPECT_GE(Read, 0);
+        InsPt += Read;
+      }
+    };
+    // Observation
+    EXPECT_FALSE(json::parse(ReadLn()).takeError());
+    // Tensor values
+    FullyRead();
+    // a "\n"
+    char Chr = 0;
+    while (FromCompiler.read(&Chr, 1) == 0) {
+    }
+    EXPECT_EQ(Chr, '\n');
+    EXPECT_EQ(Features[0], 42);
+    EXPECT_EQ(Features[1], 43);
+    EXPECT_EQ(Features[2], 100);
+    ++SeenObservations;
+
+    // Send the advice
+    float Advice = 42.0012;
+    ToCompiler.write(reinterpret_cast<const char *>(&Advice),
+                     AdviceSpec.getTotalTensorBufferSize());
+    ToCompiler.flush();
+
+    // Second observation, and same idea as above
+    EXPECT_FALSE(json::parse(ReadLn()).takeError());
+    FullyRead();
+    while (FromCompiler.read(&Chr, 1) == 0) {
+    }
+    EXPECT_EQ(Chr, '\n');
+    EXPECT_EQ(Features[0], 10);
+    EXPECT_EQ(Features[1], -2);
+    EXPECT_EQ(Features[2], 1);
+    ++SeenObservations;
+    Advice = 50.30;
+    ToCompiler.write(reinterpret_cast<const char *>(&Advice),
+                     AdviceSpec.getTotalTensorBufferSize());
+    ToCompiler.flush();
+  });
+
+  EXPECT_EQ(SeenObservations, 0);
+  *Evaluator.getTensor<int64_t>(0) = 42;
+  *Evaluator.getTensor<int64_t>(1) = 43;
+  *Evaluator.getTensor<int64_t>(2) = 100;
+  float Ret = Evaluator.evaluate<float>();
+  EXPECT_EQ(SeenObservations, 1);
+  EXPECT_FLOAT_EQ(Ret, 42.0012);
+
+  *Evaluator.getTensor<int64_t>(0) = 10;
+  *Evaluator.getTensor<int64_t>(1) = -2;
+  *Evaluator.getTensor<int64_t>(2) = 1;
+  Ret = Evaluator.evaluate<float>();
+  EXPECT_EQ(SeenObservations, 2);
+  EXPECT_FLOAT_EQ(Ret, 50.30);
+  Advisor.join();
 }
diff --git a/llvm/unittests/Analysis/TensorSpecTest.cpp b/llvm/unittests/Analysis/TensorSpecTest.cpp
@@ -59,3 +59,10 @@ TEST(TensorSpecTest, TensorSpecSizesAndTypes) {
   EXPECT_EQ(Spec3DLarge.getElementByteSize(), sizeof(float));
   EXPECT_EQ(Spec1D.getElementByteSize(), sizeof(int16_t));
 }
+
+TEST(TensorSpecTest, PrintValueForDebug) {
+  std::vector<int32_t> Values{1, 3};
+  EXPECT_EQ(tensorValueToString(reinterpret_cast<const char *>(Values.data()),
+                                TensorSpec::createSpec<int32_t>("name", {2})),
+            "1,3");
+}

Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,7 @@ class MLModelRunner {`
`47`	`47`	`return (const_cast<MLModelRunner *>(this))->getTensorUntyped(Index);`
`48`	`48`	`}`
`49`	`49`
`50`		`- enum class Kind : int { Unknown, Release, Development, NoOp };`
	`50`	`+ enum class Kind : int { Unknown, Release, Development, NoOp, Interactive };`
`51`	`51`	`Kind getKind() const { return Type; }`
`52`	`52`
`53`	`53`	`protected:`