Skip to content

Allow single EValue to be passed to Module execute. #4907

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions docs/source/extension-module.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Tensor::SizesType sizes[] = {1, 3, 256, 256};
TensorImpl tensor(ScalarType::Float, std::size(sizes), sizes, input);

// Perform an inference.
const auto result = module.forward({EValue(Tensor(&tensor))});
const auto result = module.forward(Tensor(&tensor));

// Check for success or failure.
if (result.ok()) {
Expand Down Expand Up @@ -105,13 +105,13 @@ Note: `method_meta()` will try to force-load the `Method` when called for the fi
Assuming that the `Program`'s method names and their input format is known ahead of time, we rarely need to query for those and can run the methods directly by name using the `execute()` function:

```cpp
const auto result = module.execute("forward", {EValue(Tensor(&tensor))});
const auto result = module.execute("forward", Tensor(&tensor));
```

Which can also be simplified for the standard `forward()` method name as:

```cpp
const auto result = module.forward({EValue(Tensor(&tensor))});
const auto result = module.forward(Tensor(&tensor));
```

Note: `execute()` or `forward()` will try to force load the `Program` and the `Method` when called for the first time. Therefore, the first inference will take more time than subsequent ones as it loads the model lazily and prepares it for execution unless the `Program` or `Method` was loaded explicitly earlier using the corresponding functions.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ - (BOOL)classifyWithInput:(float*)input
error:(NSError**)error {
int32_t sizes[] = {1, kChannels, kSize, kSize};
TensorImpl inputTensor(ScalarType::Float, std::size(sizes), sizes, input);
const auto result = _module->forward({EValue(Tensor(&inputTensor))});
const auto result = _module->forward(Tensor(&inputTensor));

if (!result.ok()) {
if (error) {
Expand Down
2 changes: 1 addition & 1 deletion examples/models/llava/runner/llava_image_prefiller.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class LlavaImagePrefiller : public ImagePrefiller {
image.data.data(), {3, image.height, image.width}, ScalarType::Byte);
// Run image encoder
std::vector<EValue> image_encoder_outputs = ET_UNWRAP(module_->execute(
kImageEncoderMethod, {managed_images.get_aliasing_tensor()}));
kImageEncoderMethod, managed_images.get_aliasing_tensor()));

// inputs:[start_pos, embeds]
ManagedTensor managed_start_pos(&start_pos, {1}, ScalarType::Long);
Expand Down
2 changes: 1 addition & 1 deletion examples/models/llava/runner/llava_text_decoder_runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class LlavaTextDecoderRunner : public TextDecoderRunner {

// run token embedding
std::vector<EValue> token_embedding_outputs =
ET_UNWRAP(module_->execute(kTokenEmbeddingMethod, {tokens}));
ET_UNWRAP(module_->execute(kTokenEmbeddingMethod, tokens));

// run text model
std::vector<EValue> outputs_res = ET_UNWRAP(module_->execute(
Expand Down
2 changes: 1 addition & 1 deletion extension/llm/runner/text_decoder_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ ::executorch::runtime::Result<exec_aten::Tensor> TextDecoderRunner::step(
(void)managed_start_pos; // unused

::executorch::runtime::Result<std::vector<::executorch::runtime::EValue>>
outputs_res = module_->forward({tokens});
outputs_res = module_->forward(tokens);
ET_CHECK_OK_OR_RETURN_ERROR(outputs_res.error());
ET_CHECK_MSG(
outputs_res.get().size() == 1,
Expand Down
57 changes: 54 additions & 3 deletions extension/module/module.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,25 @@ class Module final {
const std::string& method_name,
const std::vector<::executorch::runtime::EValue>& input);

/**
* Execute a specific method with a single input value.
* Loads the program and method before executing if needed.
*
* @param[in] method_name The name of the method to execute.
* @param[in] input A value to be passed to the method.
*
* @returns A Result object containing either a vector of output values
* from the method or an error to indicate failure.
*/
ET_NODISCARD
::executorch::runtime::Result<std::vector<::executorch::runtime::EValue>>
execute(
const std::string& method_name,
const ::executorch::runtime::EValue& input) {
return execute(
method_name, std::vector<::executorch::runtime::EValue>{input});
}

/**
* Execute a specific method without any input values.
* Loads the program and method before executing if needed.
Expand All @@ -193,7 +212,7 @@ class Module final {
ET_NODISCARD
::executorch::runtime::Result<std::vector<::executorch::runtime::EValue>>
execute(const std::string& method_name) {
return execute(method_name, {});
return execute(method_name, std::vector<::executorch::runtime::EValue>{});
}

/**
Expand All @@ -217,6 +236,23 @@ class Module final {
return result[0];
}

/**
* Retrieve the output value of a specific method with a single input value.
* Loads the program and method before execution if needed.
*
* @param[in] method_name The name of the method to execute.
* @param[in] input A value to be passed to the method.
*
* @returns A Result object containing either the first output value from the
* method or an error to indicate failure.
*/
ET_NODISCARD
::executorch::runtime::Result<::executorch::runtime::EValue> get(
const std::string& method_name,
const ::executorch::runtime::EValue& input) {
return get(method_name, std::vector<::executorch::runtime::EValue>{input});
}

/**
* Retrieve the output value of a specific method without any input values.
* Loads the program and method before execution if needed.
Expand All @@ -229,7 +265,7 @@ class Module final {
ET_NODISCARD
::executorch::runtime::Result<::executorch::runtime::EValue> get(
const std::string& method_name) {
return get(method_name, {});
return get(method_name, std::vector<::executorch::runtime::EValue>{});
}

/**
Expand All @@ -247,6 +283,21 @@ class Module final {
return execute("forward", input);
}

/**
* Execute the 'forward' method with a single value.
* Loads the program and method before executing if needed.
*
* @param[in] input A value for the 'forward' method.
*
* @returns A Result object containing either a vector of output values
* from the 'forward' method or an error to indicate failure.
*/
ET_NODISCARD
::executorch::runtime::Result<std::vector<::executorch::runtime::EValue>>
forward(const ::executorch::runtime::EValue& input) {
return forward(std::vector<::executorch::runtime::EValue>{input});
}

/**
* Execute the 'forward' method without any input values.
* Loads the program and method before executing if needed.
Expand All @@ -257,7 +308,7 @@ class Module final {
ET_NODISCARD
::executorch::runtime::Result<std::vector<::executorch::runtime::EValue>>
forward() {
return forward({});
return forward(std::vector<::executorch::runtime::EValue>{});
}

/**
Expand Down
26 changes: 13 additions & 13 deletions extension/module/test/module_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ TEST_F(ModuleTest, TestExecute) {
TensorImpl tensor(
ScalarType::Float, sizes.size(), sizes.data(), input.data());

const auto result = module.execute("forward", {EValue(Tensor(&tensor))});
const auto result = module.execute("forward", Tensor(&tensor));
EXPECT_TRUE(result.ok());
EXPECT_TRUE(module.is_loaded());
EXPECT_TRUE(module.is_method_loaded("forward"));
Expand All @@ -150,7 +150,7 @@ TEST_F(ModuleTest, TestExecutePreload) {
TensorImpl tensor(
ScalarType::Float, sizes.size(), sizes.data(), input.data());

const auto result = module.execute("forward", {EValue(Tensor(&tensor))});
const auto result = module.execute("forward", Tensor(&tensor));
EXPECT_TRUE(result.ok());

const auto data = result->at(0).toTensor().const_data_ptr<float>();
Expand All @@ -169,7 +169,7 @@ TEST_F(ModuleTest, TestExecutePreload_method) {
TensorImpl tensor(
ScalarType::Float, sizes.size(), sizes.data(), input.data());

const auto result = module.execute("forward", {EValue(Tensor(&tensor))});
const auto result = module.execute("forward", Tensor(&tensor));
EXPECT_TRUE(result.ok());

const auto data = result->at(0).toTensor().const_data_ptr<float>();
Expand All @@ -191,7 +191,7 @@ TEST_F(ModuleTest, TestExecutePreloadProgramAndMethod) {
TensorImpl tensor(
ScalarType::Float, sizes.size(), sizes.data(), input.data());

const auto result = module.execute("forward", {EValue(Tensor(&tensor))});
const auto result = module.execute("forward", Tensor(&tensor));
EXPECT_TRUE(result.ok());

const auto data = result->at(0).toTensor().const_data_ptr<float>();
Expand Down Expand Up @@ -223,7 +223,7 @@ TEST_F(ModuleTest, TestGet) {
TensorImpl tensor(
ScalarType::Float, sizes.size(), sizes.data(), input.data());

const auto result = module.get("forward", {EValue(Tensor(&tensor))});
const auto result = module.get("forward", Tensor(&tensor));

EXPECT_TRUE(result.ok());
const auto data = result->toTensor().const_data_ptr<float>();
Expand All @@ -237,7 +237,7 @@ TEST_F(ModuleTest, TestForward) {
std::array<int32_t, 2> sizes{1, 2};
TensorImpl tensor(
ScalarType::Float, sizes.size(), sizes.data(), input.data());
const auto result = module->forward({EValue(Tensor(&tensor))});
const auto result = module->forward(Tensor(&tensor));
EXPECT_TRUE(result.ok());

const auto data = result->at(0).toTensor().const_data_ptr<float>();
Expand All @@ -247,7 +247,7 @@ TEST_F(ModuleTest, TestForward) {
std::array<float, 2> input2{2, 3};
TensorImpl tensor2(
ScalarType::Float, sizes.size(), sizes.data(), input2.data());
const auto result2 = module->forward({EValue(Tensor(&tensor2))});
const auto result2 = module->forward(Tensor(&tensor2));
EXPECT_TRUE(result2.ok());

const auto data2 = result->at(0).toTensor().const_data_ptr<float>();
Expand All @@ -258,7 +258,7 @@ TEST_F(ModuleTest, TestForward) {
TEST_F(ModuleTest, TestForwardWithInvalidInputs) {
Module module(model_path_);

const auto result = module.forward({EValue()});
const auto result = module.forward(EValue());

EXPECT_FALSE(result.ok());
}
Expand Down Expand Up @@ -308,18 +308,18 @@ TEST_F(ModuleTest, TestProgramSharingAndDataLoaderManagement) {
TensorImpl tensor(
ScalarType::Float, sizes.size(), sizes.data(), input.data());

auto result1 = module1->execute("forward", {EValue(Tensor(&tensor))});
auto result1 = module1->execute("forward", Tensor(&tensor));
EXPECT_TRUE(result1.ok());

auto module2 = std::make_unique<Module>(module1->program());

auto result2 = module2->execute("forward", {EValue(Tensor(&tensor))});
auto result2 = module2->execute("forward", Tensor(&tensor));
EXPECT_TRUE(result2.ok());

module1 = std::make_unique<Module>("/path/to/nonexistent/file.pte");
EXPECT_FALSE(module1->is_loaded());

auto result3 = module2->execute("forward", {EValue(Tensor(&tensor))});
auto result3 = module2->execute("forward", Tensor(&tensor));
EXPECT_TRUE(result3.ok());
}

Expand Down Expand Up @@ -356,7 +356,7 @@ TEST_F(ModuleTest, TestProgramPersistenceAndReuseAfterModuleDestruction) {
TensorImpl tensor(
ScalarType::Float, sizes.size(), sizes.data(), input.data());

auto result = module.execute("forward", {EValue(Tensor(&tensor))});
auto result = module.execute("forward", Tensor(&tensor));
EXPECT_TRUE(result.ok());

auto data = result->at(0).toTensor().const_data_ptr<float>();
Expand Down Expand Up @@ -385,7 +385,7 @@ TEST_F(ModuleTest, TestConcurrentExecutionWithSharedProgram) {
TensorImpl tensor(
ScalarType::Float, sizes.size(), sizes.data(), (void*)input.data());

const auto result = module.forward({EValue(Tensor(&tensor))});
const auto result = module.forward(Tensor(&tensor));
EXPECT_TRUE(result.ok());

const auto data = result->at(0).toTensor().const_data_ptr<float>();
Expand Down
Loading