Skip to content

[mlgo] Support composite AOT-ed models #96276

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 82 additions & 14 deletions llvm/include/llvm/Analysis/ReleaseModeModelRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,40 +14,94 @@
#ifndef LLVM_ANALYSIS_RELEASEMODEMODELRUNNER_H
#define LLVM_ANALYSIS_RELEASEMODEMODELRUNNER_H

#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/MLModelRunner.h"
#include "llvm/Analysis/TensorSpec.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MD5.h"

#include <memory>
#include <vector>

namespace llvm {

/// ReleaseModeModelRunner - production mode implementation of the
/// MLModelRunner. It uses an AOT-compiled SavedModel for efficient execution.
struct EmbeddedModelRunnerOptions {
/// Feed and Fetch feature prefixes - i.e. a feature named "foo" will be
/// looked up as {FeedPrefix}_foo; and the output named "bar" will be looked
/// up as {FetchPrefix}_bar
StringRef FeedPrefix = "feed_";
StringRef FetchPrefix = "fetch_";

/// ModelSelector is the name (recognized by the AOT-ed model) of a sub-model
/// to use. "" is allowed if the model doesn't support sub-models.
StringRef ModelSelector = "";

EmbeddedModelRunnerOptions &setFeedPrefix(StringRef Value) {
FeedPrefix = Value;
return *this;
}
EmbeddedModelRunnerOptions &setFetchPrefix(StringRef Value) {
FetchPrefix = Value;
return *this;
}
EmbeddedModelRunnerOptions &setModelSelector(StringRef Value) {
ModelSelector = Value;
return *this;
}
};

template <class TGen>
class ReleaseModeModelRunner final : public MLModelRunner {
public:
/// FeatureNames' type should be an indexed collection of std::string, like
/// std::array or std::vector, that has a size() method.
template <class FType>
ReleaseModeModelRunner(LLVMContext &Ctx, const FType &InputSpec,
StringRef DecisionName, StringRef FeedPrefix = "feed_",
StringRef FetchPrefix = "fetch_")
: MLModelRunner(Ctx, MLModelRunner::Kind::Release, InputSpec.size()),
StringRef DecisionName,
const EmbeddedModelRunnerOptions &Options = {})
: MLModelRunner(Ctx, MLModelRunner::Kind::Release, InputSpec.size() + 1),
CompiledModel(std::make_unique<TGen>()) {
assert(CompiledModel && "The CompiledModel should be valid");

for (size_t I = 0; I < InputSpec.size(); ++I) {
const int Index =
CompiledModel->LookupArgIndex(FeedPrefix.str() + InputSpec[I].name());
void *Buffer = nullptr;
if (Index >= 0)
Buffer = CompiledModel->arg_data(Index);
setUpBufferForTensor(I, InputSpec[I], Buffer);
// Set up the model_selector past all the InputSpecs in all cases.
// - if the model doesn't have such a feature, but the user requested it,
// we report error. Same if the model supports it but the user didn't
// specify it
// - finally, we compute the MD5 hash of the user input and set the value
// of the model selector to {high, low}
bool InputIsPresent = true;
populateTensor(InputSpec.size(),
TensorSpec::createSpec<uint64_t>("_model_selector", {2}),
Options.FeedPrefix, InputIsPresent);

// If we hit the "report an error" cases outlined above, continue with the
// set up in case there's some custom diagnostics handler installed and it
// doesn't promptly exit.
if (Options.ModelSelector.empty() && InputIsPresent)
Ctx.emitError(
"A model selector was not specified but the underlying model "
"requires selecting one because it exposes a _model_selector input");
uint64_t High = 0;
uint64_t Low = 0;
if (!Options.ModelSelector.empty()) {
if (!InputIsPresent)
Ctx.emitError("A model selector was specified but the underlying model "
"does not expose a _model_selector input");
const auto Hash = MD5::hash(arrayRefFromStringRef(Options.ModelSelector));
High = Hash.high();
Low = Hash.low();
}

ResultIndex = CompiledModel->LookupResultIndex(FetchPrefix.str() +
getTensor<uint64_t>(InputSpec.size())[0] = High;
getTensor<uint64_t>(InputSpec.size())[1] = Low;
// At this point, the model selector is set up. If the user didn't provide
// one, but the model has a _model_selector, it'll be set to (0, 0) which
// the composite model should treat as error as part of its implementation
// (but that should only matter if there is a custom handler that doesn't
// exit on error)
for (size_t I = 0; I < InputSpec.size(); ++I)
populateTensor(I, InputSpec[I], Options.FeedPrefix, InputIsPresent);

ResultIndex = CompiledModel->LookupResultIndex(Options.FetchPrefix.str() +
DecisionName.str());
assert(ResultIndex >= 0 && "Cannot find DecisionName in inlining model");
}
Expand All @@ -59,6 +113,20 @@ class ReleaseModeModelRunner final : public MLModelRunner {
}

private:
// fetch the model-provided buffer for the given Spec, or let MLModelRunner
// create a scratch buffer. Indicate back to the caller if the model had that
// input in the first place.
void populateTensor(size_t Pos, const TensorSpec &Spec, StringRef Prefix,
bool &InputIsPresent) {
const int Index =
CompiledModel->LookupArgIndex((Prefix + Spec.name()).str());
void *Buffer = nullptr;
InputIsPresent = Index >= 0;
if (InputIsPresent)
Buffer = CompiledModel->arg_data(Index);
setUpBufferForTensor(Pos, Spec, Buffer);
}

void *evaluateUntyped() override {
CompiledModel->Run();
return CompiledModel->result_data(ResultIndex);
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Analysis/MLInlineAdvisor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ static cl::opt<SkipMLPolicyCriteria> SkipPolicy(
clEnumValN(SkipMLPolicyCriteria::IfCallerIsNotCold,
"if-caller-not-cold", "if the caller is not cold")));

static cl::opt<std::string> ModelSelector("ml-inliner-model-selector",
cl::Hidden, cl::init(""));

#if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL)
// codegen-ed file
#include "InlinerSizeModel.h" // NOLINT
Expand All @@ -73,7 +76,8 @@ llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM,
std::unique_ptr<MLModelRunner> AOTRunner;
if (InteractiveChannelBaseName.empty())
AOTRunner = std::make_unique<ReleaseModeModelRunner<CompiledModelType>>(
M.getContext(), FeatureMap, DecisionName);
M.getContext(), FeatureMap, DecisionName,
EmbeddedModelRunnerOptions().setModelSelector(ModelSelector));
else {
auto Features = FeatureMap;
if (InteractiveIncludeDefault)
Expand Down
137 changes: 125 additions & 12 deletions llvm/unittests/Analysis/MLModelRunnerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
//===----------------------------------------------------------------------===//

#include "llvm/Analysis/MLModelRunner.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/InteractiveModelRunner.h"
#include "llvm/Analysis/NoInferenceModelRunner.h"
#include "llvm/Analysis/ReleaseModeModelRunner.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/JSON.h"
Expand All @@ -28,28 +30,31 @@ namespace llvm {
// This is a mock of the kind of AOT-generated model evaluator. It has 2 tensors
// of shape {1}, and 'evaluation' adds them.
// The interface is the one expected by ReleaseModelRunner.
class MockAOTModel final {
class MockAOTModelBase {
protected:
int64_t A = 0;
int64_t B = 0;
int64_t R = 0;

public:
MockAOTModel() = default;
int LookupArgIndex(const std::string &Name) {
MockAOTModelBase() = default;
virtual ~MockAOTModelBase() = default;

virtual int LookupArgIndex(const std::string &Name) {
if (Name == "prefix_a")
return 0;
if (Name == "prefix_b")
return 1;
return -1;
}
int LookupResultIndex(const std::string &) { return 0; }
void Run() { R = A + B; }
void *result_data(int RIndex) {
virtual void Run() = 0;
virtual void *result_data(int RIndex) {
if (RIndex == 0)
return &R;
return nullptr;
}
void *arg_data(int Index) {
virtual void *arg_data(int Index) {
switch (Index) {
case 0:
return &A;
Expand All @@ -60,6 +65,64 @@ class MockAOTModel final {
}
}
};

class AdditionAOTModel final : public MockAOTModelBase {
public:
AdditionAOTModel() = default;
void Run() override { R = A + B; }
};

class DiffAOTModel final : public MockAOTModelBase {
public:
DiffAOTModel() = default;
void Run() override { R = A - B; }
};

static const char *M1Selector = "the model that subtracts";
static const char *M2Selector = "the model that adds";

static MD5::MD5Result Hash1 = MD5::hash(arrayRefFromStringRef(M1Selector));
static MD5::MD5Result Hash2 = MD5::hash(arrayRefFromStringRef(M2Selector));
class ComposedAOTModel final {
DiffAOTModel M1;
AdditionAOTModel M2;
uint64_t Selector[2] = {0};

bool isHashSameAsSelector(const std::pair<uint64_t, uint64_t> &Words) const {
return Selector[0] == Words.first && Selector[1] == Words.second;
}
MockAOTModelBase *getModel() {
if (isHashSameAsSelector(Hash1.words()))
return &M1;
if (isHashSameAsSelector(Hash2.words()))
return &M2;
llvm_unreachable("Should be one of the two");
}

public:
ComposedAOTModel() = default;
int LookupArgIndex(const std::string &Name) {
if (Name == "prefix__model_selector")
return 2;
return getModel()->LookupArgIndex(Name);
}
int LookupResultIndex(const std::string &Name) {
return getModel()->LookupResultIndex(Name);
}
void *arg_data(int Index) {
if (Index == 2)
return Selector;
return getModel()->arg_data(Index);
}
void *result_data(int RIndex) { return getModel()->result_data(RIndex); }
void Run() { getModel()->Run(); }
};

static EmbeddedModelRunnerOptions makeOptions() {
EmbeddedModelRunnerOptions Opts;
Opts.setFeedPrefix("prefix_");
return Opts;
}
} // namespace llvm

TEST(NoInferenceModelRunner, AccessTensors) {
Expand All @@ -86,8 +149,8 @@ TEST(ReleaseModeRunner, NormalUse) {
LLVMContext Ctx;
std::vector<TensorSpec> Inputs{TensorSpec::createSpec<int64_t>("a", {1}),
TensorSpec::createSpec<int64_t>("b", {1})};
auto Evaluator = std::make_unique<ReleaseModeModelRunner<MockAOTModel>>(
Ctx, Inputs, "", "prefix_");
auto Evaluator = std::make_unique<ReleaseModeModelRunner<AdditionAOTModel>>(
Ctx, Inputs, "", makeOptions());
*Evaluator->getTensor<int64_t>(0) = 1;
*Evaluator->getTensor<int64_t>(1) = 2;
EXPECT_EQ(Evaluator->evaluate<int64_t>(), 3);
Expand All @@ -100,8 +163,8 @@ TEST(ReleaseModeRunner, ExtraFeatures) {
std::vector<TensorSpec> Inputs{TensorSpec::createSpec<int64_t>("a", {1}),
TensorSpec::createSpec<int64_t>("b", {1}),
TensorSpec::createSpec<int64_t>("c", {1})};
auto Evaluator = std::make_unique<ReleaseModeModelRunner<MockAOTModel>>(
Ctx, Inputs, "", "prefix_");
auto Evaluator = std::make_unique<ReleaseModeModelRunner<AdditionAOTModel>>(
Ctx, Inputs, "", makeOptions());
*Evaluator->getTensor<int64_t>(0) = 1;
*Evaluator->getTensor<int64_t>(1) = 2;
*Evaluator->getTensor<int64_t>(2) = -3;
Expand All @@ -118,8 +181,8 @@ TEST(ReleaseModeRunner, ExtraFeaturesOutOfOrder) {
TensorSpec::createSpec<int64_t>("c", {1}),
TensorSpec::createSpec<int64_t>("b", {1}),
};
auto Evaluator = std::make_unique<ReleaseModeModelRunner<MockAOTModel>>(
Ctx, Inputs, "", "prefix_");
auto Evaluator = std::make_unique<ReleaseModeModelRunner<AdditionAOTModel>>(
Ctx, Inputs, "", makeOptions());
*Evaluator->getTensor<int64_t>(0) = 1; // a
*Evaluator->getTensor<int64_t>(1) = 2; // c
*Evaluator->getTensor<int64_t>(2) = -3; // b
Expand All @@ -129,6 +192,56 @@ TEST(ReleaseModeRunner, ExtraFeaturesOutOfOrder) {
EXPECT_EQ(*Evaluator->getTensor<int64_t>(2), -3);
}

// We expect an error to be reported early if the user tried to specify a model
// selector, but the model in fact doesn't support that.
TEST(ReleaseModelRunner, ModelSelectorNoInputFeaturePresent) {
LLVMContext Ctx;
std::vector<TensorSpec> Inputs{TensorSpec::createSpec<int64_t>("a", {1}),
TensorSpec::createSpec<int64_t>("b", {1})};
EXPECT_DEATH(std::make_unique<ReleaseModeModelRunner<AdditionAOTModel>>(
Ctx, Inputs, "", makeOptions().setModelSelector(M2Selector)),
"A model selector was specified but the underlying model does "
"not expose a _model_selector input");
}

TEST(ReleaseModelRunner, ModelSelectorNoSelectorGiven) {
LLVMContext Ctx;
std::vector<TensorSpec> Inputs{TensorSpec::createSpec<int64_t>("a", {1}),
TensorSpec::createSpec<int64_t>("b", {1})};
EXPECT_DEATH(
std::make_unique<ReleaseModeModelRunner<ComposedAOTModel>>(
Ctx, Inputs, "", makeOptions()),
"A model selector was not specified but the underlying model requires "
"selecting one because it exposes a _model_selector input");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mtrofin I'm getting build warnings on MSVC due to these EXPECT_DEATH with make_unique calls - anyway that you can refactor to avoid them please?

E:\llvm\llvm-project\llvm\unittests\Analysis\MLModelRunnerTest.cpp(201): warning C4858: discarding return value: This function constructs an object wrapped by a smart pointer and has no other effects; it is not useful to call this function and discard the return value. 

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can exclude these for msvc. Is it urgent? If not, I can take care of it next week if that's ok (holiday)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not urgent, if its OK with you I'll just add (void) which seems to silence it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By all means - thanks!

}

// Test that we correctly set up the _model_selector tensor value. We are only
// responsbile for what happens if the user doesn't specify a value (but the
// model supports the feature), or if the user specifies one, and we correctly
// populate the tensor, and do so upfront (in case the model implementation
// needs that for subsequent tensor buffer lookups).
TEST(ReleaseModelRunner, ModelSelector) {
LLVMContext Ctx;
std::vector<TensorSpec> Inputs{TensorSpec::createSpec<int64_t>("a", {1}),
TensorSpec::createSpec<int64_t>("b", {1})};
// This explicitly asks for M1
auto Evaluator = std::make_unique<ReleaseModeModelRunner<ComposedAOTModel>>(
Ctx, Inputs, "", makeOptions().setModelSelector(M1Selector));
*Evaluator->getTensor<int64_t>(0) = 1;
*Evaluator->getTensor<int64_t>(1) = 2;
EXPECT_EQ(Evaluator->evaluate<int64_t>(), -1);

// Ask for M2
Evaluator = std::make_unique<ReleaseModeModelRunner<ComposedAOTModel>>(
Ctx, Inputs, "", makeOptions().setModelSelector(M2Selector));
*Evaluator->getTensor<int64_t>(0) = 1;
*Evaluator->getTensor<int64_t>(1) = 2;
EXPECT_EQ(Evaluator->evaluate<int64_t>(), 3);

// Asking for a model that's not supported isn't handled by our infra and we
// expect the model implementation to fail at a point.
}

#if defined(LLVM_ON_UNIX)
TEST(InteractiveModelRunner, Evaluation) {
LLVMContext Ctx;
Expand Down
Loading