llvm · mtrofin · Jun 24, 2024 · Jun 21, 2024 · Jun 21, 2024 · Jun 21, 2024
diff --git a/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h b/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h
@@ -14,40 +14,94 @@
 #ifndef LLVM_ANALYSIS_RELEASEMODEMODELRUNNER_H
 #define LLVM_ANALYSIS_RELEASEMODEMODELRUNNER_H
 
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/MLModelRunner.h"
 #include "llvm/Analysis/TensorSpec.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MD5.h"
 
 #include <memory>
-#include <vector>
 
 namespace llvm {
 
 /// ReleaseModeModelRunner - production mode implementation of the
 /// MLModelRunner. It uses an AOT-compiled SavedModel for efficient execution.
+struct EmbeddedModelRunnerOptions {
+  /// Feed and Fetch feature prefixes - i.e. a feature named "foo" will be
+  /// looked up as {FeedPrefix}_foo; and the output named "bar" will be looked
+  /// up as {FetchPrefix}_bar
+  StringRef FeedPrefix = "feed_";
+  StringRef FetchPrefix = "fetch_";
+
+  /// ModelSelector is the name (recognized by the AOT-ed model) of a sub-model
+  /// to use. "" is allowed if the model doesn't support sub-models.
+  StringRef ModelSelector = "";
+
+  EmbeddedModelRunnerOptions &setFeedPrefix(StringRef Value) {
+    FeedPrefix = Value;
+    return *this;
+  }
+  EmbeddedModelRunnerOptions &setFetchPrefix(StringRef Value) {
+    FetchPrefix = Value;
+    return *this;
+  }
+  EmbeddedModelRunnerOptions &setModelSelector(StringRef Value) {
+    ModelSelector = Value;
+    return *this;
+  }
+};
+
 template <class TGen>
 class ReleaseModeModelRunner final : public MLModelRunner {
 public:
   /// FeatureNames' type should be an indexed collection of std::string, like
   /// std::array or std::vector, that has a size() method.
   template <class FType>
   ReleaseModeModelRunner(LLVMContext &Ctx, const FType &InputSpec,
-                         StringRef DecisionName, StringRef FeedPrefix = "feed_",
-                         StringRef FetchPrefix = "fetch_")
-      : MLModelRunner(Ctx, MLModelRunner::Kind::Release, InputSpec.size()),
+                         StringRef DecisionName,
+                         const EmbeddedModelRunnerOptions &Options = {})
+      : MLModelRunner(Ctx, MLModelRunner::Kind::Release, InputSpec.size() + 1),
         CompiledModel(std::make_unique<TGen>()) {
     assert(CompiledModel && "The CompiledModel should be valid");
-
-    for (size_t I = 0; I < InputSpec.size(); ++I) {
-      const int Index =
-          CompiledModel->LookupArgIndex(FeedPrefix.str() + InputSpec[I].name());
-      void *Buffer = nullptr;
-      if (Index >= 0)
-        Buffer = CompiledModel->arg_data(Index);
-      setUpBufferForTensor(I, InputSpec[I], Buffer);
+    // Set up the model_selector past all the InputSpecs in all cases.
+    //   - if the model doesn't have such a feature, but the user requested it,
+    //   we report error. Same if the model supports it but the user didn't
+    //   specify it
+    //   - finally, we compute the MD5 hash of the user input and set the value
+    //   of the model selector to {high, low}
+    bool InputIsPresent = true;
+    populateTensor(InputSpec.size(),
+                   TensorSpec::createSpec<uint64_t>("_model_selector", {2}),
+                   Options.FeedPrefix, InputIsPresent);
+
+    // If we hit the "report an error" cases outlined above, continue with the
+    // set up in case there's some custom diagnostics handler installed and it
+    // doesn't promptly exit.
+    if (Options.ModelSelector.empty() && InputIsPresent)
+      Ctx.emitError(
+          "A model selector was not specified but the underlying model "
+          "requires selecting one because it exposes a _model_selector input");
+    uint64_t High = 0;
+    uint64_t Low = 0;
+    if (!Options.ModelSelector.empty()) {
+      if (!InputIsPresent)
+        Ctx.emitError("A model selector was specified but the underlying model "
+                      "does not expose a _model_selector input");
+      const auto Hash = MD5::hash(arrayRefFromStringRef(Options.ModelSelector));
+      High = Hash.high();
+      Low = Hash.low();
     }
-
-    ResultIndex = CompiledModel->LookupResultIndex(FetchPrefix.str() +
+    getTensor<uint64_t>(InputSpec.size())[0] = High;
+    getTensor<uint64_t>(InputSpec.size())[1] = Low;
+    // At this point, the model selector is set up. If the user didn't provide
+    // one, but the model has a _model_selector, it'll be set to (0, 0) which
+    // the composite model should treat as error as part of its implementation
+    // (but that should only matter if there is a custom handler that doesn't
+    // exit on error)
+    for (size_t I = 0; I < InputSpec.size(); ++I)
+      populateTensor(I, InputSpec[I], Options.FeedPrefix, InputIsPresent);
+
+    ResultIndex = CompiledModel->LookupResultIndex(Options.FetchPrefix.str() +
                                                    DecisionName.str());
     assert(ResultIndex >= 0 && "Cannot find DecisionName in inlining model");
   }
@@ -59,6 +113,20 @@ class ReleaseModeModelRunner final : public MLModelRunner {
   }
 
 private:
+  // fetch the model-provided buffer for the given Spec, or let MLModelRunner
+  // create a scratch buffer. Indicate back to the caller if the model had that
+  // input in the first place.
+  void populateTensor(size_t Pos, const TensorSpec &Spec, StringRef Prefix,
+                      bool &InputIsPresent) {
+    const int Index =
+        CompiledModel->LookupArgIndex((Prefix + Spec.name()).str());
+    void *Buffer = nullptr;
+    InputIsPresent = Index >= 0;
+    if (InputIsPresent)
+      Buffer = CompiledModel->arg_data(Index);
+    setUpBufferForTensor(Pos, Spec, Buffer);
+  }
+
   void *evaluateUntyped() override {
     CompiledModel->Run();
     return CompiledModel->result_data(ResultIndex);

diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp
@@ -56,6 +56,9 @@ static cl::opt<SkipMLPolicyCriteria> SkipPolicy(
                clEnumValN(SkipMLPolicyCriteria::IfCallerIsNotCold,
                           "if-caller-not-cold", "if the caller is not cold")));
 
+static cl::opt<std::string> ModelSelector("ml-inliner-model-selector",
+                                          cl::Hidden, cl::init(""));
+
 #if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL)
 // codegen-ed file
 #include "InlinerSizeModel.h" // NOLINT
@@ -73,7 +76,8 @@ llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM,
   std::unique_ptr<MLModelRunner> AOTRunner;
   if (InteractiveChannelBaseName.empty())
     AOTRunner = std::make_unique<ReleaseModeModelRunner<CompiledModelType>>(
-        M.getContext(), FeatureMap, DecisionName);
+        M.getContext(), FeatureMap, DecisionName,
+        EmbeddedModelRunnerOptions().setModelSelector(ModelSelector));
   else {
     auto Features = FeatureMap;
     if (InteractiveIncludeDefault)

diff --git a/llvm/unittests/Analysis/MLModelRunnerTest.cpp b/llvm/unittests/Analysis/MLModelRunnerTest.cpp
@@ -7,10 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/InteractiveModelRunner.h"
 #include "llvm/Analysis/NoInferenceModelRunner.h"
 #include "llvm/Analysis/ReleaseModeModelRunner.h"
 #include "llvm/Support/BinaryByteStream.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/JSON.h"
@@ -28,28 +30,31 @@ namespace llvm {
 // This is a mock of the kind of AOT-generated model evaluator. It has 2 tensors
 // of shape {1}, and 'evaluation' adds them.
 // The interface is the one expected by ReleaseModelRunner.
-class MockAOTModel final {
+class MockAOTModelBase {
+protected:
   int64_t A = 0;
   int64_t B = 0;
   int64_t R = 0;
 
 public:
-  MockAOTModel() = default;
-  int LookupArgIndex(const std::string &Name) {
+  MockAOTModelBase() = default;
+  virtual ~MockAOTModelBase() = default;
+
+  virtual int LookupArgIndex(const std::string &Name) {
     if (Name == "prefix_a")
       return 0;
     if (Name == "prefix_b")
       return 1;
     return -1;
   }
   int LookupResultIndex(const std::string &) { return 0; }
-  void Run() { R = A + B; }
-  void *result_data(int RIndex) {
+  virtual void Run() = 0;
+  virtual void *result_data(int RIndex) {
     if (RIndex == 0)
       return &R;
     return nullptr;
   }
-  void *arg_data(int Index) {
+  virtual void *arg_data(int Index) {
     switch (Index) {
     case 0:
       return &A;
@@ -60,6 +65,64 @@ class MockAOTModel final {
     }
   }
 };
+
+class AdditionAOTModel final : public MockAOTModelBase {
+public:
+  AdditionAOTModel() = default;
+  void Run() override { R = A + B; }
+};
+
+class DiffAOTModel final : public MockAOTModelBase {
+public:
+  DiffAOTModel() = default;
+  void Run() override { R = A - B; }
+};
+
+static const char *M1Selector = "the model that subtracts";
+static const char *M2Selector = "the model that adds";
+
+static MD5::MD5Result Hash1 = MD5::hash(arrayRefFromStringRef(M1Selector));
+static MD5::MD5Result Hash2 = MD5::hash(arrayRefFromStringRef(M2Selector));
+class ComposedAOTModel final {
+  DiffAOTModel M1;
+  AdditionAOTModel M2;
+  uint64_t Selector[2] = {0};
+
+  bool isHashSameAsSelector(const std::pair<uint64_t, uint64_t> &Words) const {
+    return Selector[0] == Words.first && Selector[1] == Words.second;
+  }
+  MockAOTModelBase *getModel() {
+    if (isHashSameAsSelector(Hash1.words()))
+      return &M1;
+    if (isHashSameAsSelector(Hash2.words()))
+      return &M2;
+    llvm_unreachable("Should be one of the two");
+  }
+
+public:
+  ComposedAOTModel() = default;
+  int LookupArgIndex(const std::string &Name) {
+    if (Name == "prefix__model_selector")
+      return 2;
+    return getModel()->LookupArgIndex(Name);
+  }
+  int LookupResultIndex(const std::string &Name) {
+    return getModel()->LookupResultIndex(Name);
+  }
+  void *arg_data(int Index) {
+    if (Index == 2)
+      return Selector;
+    return getModel()->arg_data(Index);
+  }
+  void *result_data(int RIndex) { return getModel()->result_data(RIndex); }
+  void Run() { getModel()->Run(); }
+};
+
+static EmbeddedModelRunnerOptions makeOptions() {
+  EmbeddedModelRunnerOptions Opts;
+  Opts.setFeedPrefix("prefix_");
+  return Opts;
+}
 } // namespace llvm
 
 TEST(NoInferenceModelRunner, AccessTensors) {
@@ -86,8 +149,8 @@ TEST(ReleaseModeRunner, NormalUse) {
   LLVMContext Ctx;
   std::vector<TensorSpec> Inputs{TensorSpec::createSpec<int64_t>("a", {1}),
                                  TensorSpec::createSpec<int64_t>("b", {1})};
-  auto Evaluator = std::make_unique<ReleaseModeModelRunner<MockAOTModel>>(
-      Ctx, Inputs, "", "prefix_");
+  auto Evaluator = std::make_unique<ReleaseModeModelRunner<AdditionAOTModel>>(
+      Ctx, Inputs, "", makeOptions());
   *Evaluator->getTensor<int64_t>(0) = 1;
   *Evaluator->getTensor<int64_t>(1) = 2;
   EXPECT_EQ(Evaluator->evaluate<int64_t>(), 3);
@@ -100,8 +163,8 @@ TEST(ReleaseModeRunner, ExtraFeatures) {
   std::vector<TensorSpec> Inputs{TensorSpec::createSpec<int64_t>("a", {1}),
                                  TensorSpec::createSpec<int64_t>("b", {1}),
                                  TensorSpec::createSpec<int64_t>("c", {1})};
-  auto Evaluator = std::make_unique<ReleaseModeModelRunner<MockAOTModel>>(
-      Ctx, Inputs, "", "prefix_");
+  auto Evaluator = std::make_unique<ReleaseModeModelRunner<AdditionAOTModel>>(
+      Ctx, Inputs, "", makeOptions());
   *Evaluator->getTensor<int64_t>(0) = 1;
   *Evaluator->getTensor<int64_t>(1) = 2;
   *Evaluator->getTensor<int64_t>(2) = -3;
@@ -118,8 +181,8 @@ TEST(ReleaseModeRunner, ExtraFeaturesOutOfOrder) {
       TensorSpec::createSpec<int64_t>("c", {1}),
       TensorSpec::createSpec<int64_t>("b", {1}),
   };
-  auto Evaluator = std::make_unique<ReleaseModeModelRunner<MockAOTModel>>(
-      Ctx, Inputs, "", "prefix_");
+  auto Evaluator = std::make_unique<ReleaseModeModelRunner<AdditionAOTModel>>(
+      Ctx, Inputs, "", makeOptions());
   *Evaluator->getTensor<int64_t>(0) = 1;         // a
   *Evaluator->getTensor<int64_t>(1) = 2;         // c
   *Evaluator->getTensor<int64_t>(2) = -3;        // b
@@ -129,6 +192,56 @@ TEST(ReleaseModeRunner, ExtraFeaturesOutOfOrder) {
   EXPECT_EQ(*Evaluator->getTensor<int64_t>(2), -3);
 }
 
+// We expect an error to be reported early if the user tried to specify a model
+// selector, but the model in fact doesn't support that.
+TEST(ReleaseModelRunner, ModelSelectorNoInputFeaturePresent) {
+  LLVMContext Ctx;
+  std::vector<TensorSpec> Inputs{TensorSpec::createSpec<int64_t>("a", {1}),
+                                 TensorSpec::createSpec<int64_t>("b", {1})};
+  EXPECT_DEATH(std::make_unique<ReleaseModeModelRunner<AdditionAOTModel>>(
+                   Ctx, Inputs, "", makeOptions().setModelSelector(M2Selector)),
+               "A model selector was specified but the underlying model does "
+               "not expose a _model_selector input");
+}
+
+TEST(ReleaseModelRunner, ModelSelectorNoSelectorGiven) {
+  LLVMContext Ctx;
+  std::vector<TensorSpec> Inputs{TensorSpec::createSpec<int64_t>("a", {1}),
+                                 TensorSpec::createSpec<int64_t>("b", {1})};
+  EXPECT_DEATH(
+      std::make_unique<ReleaseModeModelRunner<ComposedAOTModel>>(
+          Ctx, Inputs, "", makeOptions()),
+      "A model selector was not specified but the underlying model requires "
+      "selecting one because it exposes a _model_selector input");
+}
+
+// Test that we correctly set up the _model_selector tensor value. We are only
+// responsbile for what happens if the user doesn't specify a value (but the
+// model supports the feature), or if the user specifies one, and we correctly
+// populate the tensor, and do so upfront (in case the model implementation
+// needs that for subsequent tensor buffer lookups).
+TEST(ReleaseModelRunner, ModelSelector) {
+  LLVMContext Ctx;
+  std::vector<TensorSpec> Inputs{TensorSpec::createSpec<int64_t>("a", {1}),
+                                 TensorSpec::createSpec<int64_t>("b", {1})};
+  // This explicitly asks for M1
+  auto Evaluator = std::make_unique<ReleaseModeModelRunner<ComposedAOTModel>>(
+      Ctx, Inputs, "", makeOptions().setModelSelector(M1Selector));
+  *Evaluator->getTensor<int64_t>(0) = 1;
+  *Evaluator->getTensor<int64_t>(1) = 2;
+  EXPECT_EQ(Evaluator->evaluate<int64_t>(), -1);
+
+  // Ask for M2
+  Evaluator = std::make_unique<ReleaseModeModelRunner<ComposedAOTModel>>(
+      Ctx, Inputs, "", makeOptions().setModelSelector(M2Selector));
+  *Evaluator->getTensor<int64_t>(0) = 1;
+  *Evaluator->getTensor<int64_t>(1) = 2;
+  EXPECT_EQ(Evaluator->evaluate<int64_t>(), 3);
+
+  // Asking for a model that's not supported isn't handled by our infra and we
+  // expect the model implementation to fail at a point.
+}
+
 #if defined(LLVM_ON_UNIX)
 TEST(InteractiveModelRunner, Evaluation) {
   LLVMContext Ctx;