Skip to content

Commit 70743bb

Browse files
helunwencserfacebook-github-bot
authored andcommitted
add phi-3-mini runner (#3951)
Summary: This PR adds a basic runner for running the phi-3-mini model. It uses sentencepiece to create the tokenizer. Commands for running the model: ``` # setup executorch per instructions in https://pytorch.org/executorch/stable/getting-started-setup.html # install latest transformers pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers # export the model, will take a few minutes cd examples/models/phi-3-mini python export_model.py # download the tokenizer.model wget -O tokenizer.model https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/resolve/main/tokenizer.model?download=true # build the runner mkdir cmake-out cd cmake-out cmake .. cd .. cmake --build cmake-out -j10 ./cmake-out/phi_3_mini_runner ``` Pull Request resolved: #3951 Reviewed By: larryliu0820 Differential Revision: D58477481 Pulled By: helunwencser fbshipit-source-id: c5a7e6781338d4347a1b9d06b22e23613633df6b
1 parent 4ed5bc7 commit 70743bb

File tree

6 files changed

+204
-0
lines changed

6 files changed

+204
-0
lines changed

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,6 @@
6464
[submodule "third-party/ios-cmake"]
6565
path = third-party/ios-cmake
6666
url = https://github.com/leetal/ios-cmake
67+
[submodule "examples/models/phi-3-mini/third-party/sentencepiece"]
68+
path = examples/models/phi-3-mini/third-party/sentencepiece
69+
url = https://github.com/google/sentencepiece.git
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
cmake_minimum_required(VERSION 3.19)
8+
project(phi_3_mini_runner)
9+
10+
set(CMAKE_CXX_STANDARD 17)
11+
set(CMAKE_CXX_STANDARD_REQUIRED True)
12+
set(CMAKE_BUILD_TYPE Release)
13+
14+
# Set options for executorch build.
15+
option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "" ON)
16+
option(EXECUTORCH_BUILD_EXTENSION_MODULE "" ON)
17+
option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "" ON)
18+
option(EXECUTORCH_BUILD_XNNPACK "" ON)
19+
20+
add_subdirectory(
21+
${CMAKE_CURRENT_SOURCE_DIR}/../../..
22+
${CMAKE_BINARY_DIR}/../../..)
23+
add_subdirectory(
24+
${CMAKE_CURRENT_SOURCE_DIR}/third-party/sentencepiece
25+
${CMAKE_BINARY_DIR}/third-party/sentencepiece)
26+
27+
add_executable(phi_3_mini_runner main.cpp)
28+
target_include_directories(
29+
phi_3_mini_runner
30+
PUBLIC
31+
${CMAKE_CURRENT_SOURCE_DIR}/third-party/sentencepiece/src)
32+
target_link_libraries(
33+
phi_3_mini_runner
34+
PRIVATE
35+
executorch
36+
extension_module_static
37+
optimized_native_cpu_ops_lib
38+
xnnpack_backend
39+
sentencepiece)

examples/models/phi-3-mini/README.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Summary
2+
This example demonstrates how to run a [Phi-3-mini](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct) 3.8B model via ExecuTorch. We use XNNPACK to accelarate the performance and XNNPACK symmetric per channel quantization.
3+
4+
# Instructions
5+
## Step 1: Setup
6+
1. Follow the [tutorial](https://pytorch.org/executorch/main/getting-started-setup) to set up ExecuTorch. For installation run `./install_requirements.sh --pybind xnnpack`
7+
2. Phi-3 Mini-128K-Instruct has been integrated in the development version (4.41.0.dev0) of transformers. Make sure that you install transformers with version at least 4.41.0: `pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers`
8+
9+
10+
## Step 2: Prepare and run the model
11+
1. Download the `tokenizer.model` from HuggingFace.
12+
```
13+
cd examples/models/phi-3-mini
14+
wget -O tokenizer.model https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/resolve/main/tokenizer.model?download=true
15+
```
16+
2. Export the model. This step will take a few minutes to finish.
17+
```
18+
python export_model.py
19+
```
20+
3. Build and run the runner.
21+
```
22+
mkdir cmake-out
23+
cd cmake-out
24+
cmake ..
25+
cd ..
26+
cmake --build cmake-out -j10
27+
./cmake-out/phi_3_mini_runner
28+
```

examples/models/phi-3-mini/main.cpp

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
// main.cpp
10+
11+
#include <iostream>
12+
13+
#include <executorch/extension/module/module.h>
14+
#include <executorch/extension/runner_util/managed_tensor.h>
15+
16+
#include "sentence_piece_tokenizer.h"
17+
18+
using namespace torch::executor;
19+
20+
// The value of the phi-3-mini `<|endoftext|>` token.
21+
#define ENDOFTEXT_TOKEN 32000
22+
#define VOCABULARY_SIZE 32064
23+
24+
// TODO(lunwenh): refactor and share with llama
25+
void generate(
26+
Module& llm_model,
27+
std::string& prompt,
28+
SentencePieceTokenizer& tokenizer,
29+
size_t max_output_length) {
30+
// Convert the input text into a list of integers (tokens) that represents
31+
// it, using the string-to-token mapping that the model was trained on.
32+
// Each token is an integer that represents a word or part of a word.
33+
std::vector<int64_t> input_tokens = tokenizer.encode(prompt);
34+
35+
std::cout << "Generating tokens ..." << std::endl;
36+
37+
std::vector<int64_t> output_tokens;
38+
39+
for (size_t i = 0; i < max_output_length; i++) {
40+
ManagedTensor tensor_tokens(
41+
input_tokens.data(),
42+
{1, static_cast<int>(input_tokens.size())},
43+
ScalarType::Long);
44+
std::vector<EValue> inputs = {tensor_tokens.get_aliasing_tensor()};
45+
46+
Result<std::vector<EValue>> result_evalue = llm_model.forward(inputs);
47+
48+
const auto error = result_evalue.error();
49+
Tensor logits_tensor = result_evalue.get()[0].toTensor();
50+
const auto sentence_length = logits_tensor.size(1);
51+
std::vector<float> logits(
52+
logits_tensor.data_ptr<float>() +
53+
(sentence_length - 1) * VOCABULARY_SIZE,
54+
logits_tensor.data_ptr<float>() + sentence_length * VOCABULARY_SIZE);
55+
56+
// Sample the next token from the logits.
57+
int64_t next_token =
58+
std::max_element(logits.begin(), logits.end()) - logits.begin();
59+
60+
std::cout << next_token << "\t";
61+
std::cout.flush();
62+
63+
// Break if we reached the end of the text.
64+
if (next_token == ENDOFTEXT_TOKEN) {
65+
break;
66+
}
67+
68+
output_tokens.push_back(next_token);
69+
70+
// Update next input.
71+
input_tokens.push_back(next_token);
72+
}
73+
74+
std::cout << std::endl;
75+
std::cout << tokenizer.decode(output_tokens) << std::endl;
76+
}
77+
78+
int main() {
79+
// Set up the prompt. This provides the seed text for the model to elaborate.
80+
std::cout << "Enter model prompt: ";
81+
std::string prompt;
82+
std::getline(std::cin, prompt);
83+
84+
SentencePieceTokenizer tokenizer("tokenizer.model");
85+
86+
Module model("phi-3-mini.pte", Module::MlockConfig::UseMlockIgnoreErrors);
87+
88+
const auto max_output_tokens = 128;
89+
generate(model, prompt, tokenizer, max_output_tokens);
90+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <sstream>
10+
11+
#include <sentencepiece_processor.h>
12+
13+
// TODO(lunwenh): Add unit tests
14+
class SentencePieceTokenizer {
15+
public:
16+
SentencePieceTokenizer(const std::string& filePath) {
17+
const auto status = processor_.Load(filePath);
18+
if (!status.ok()) {
19+
std::ostringstream errorMessageStream;
20+
errorMessageStream << "Failed to load SentencePiece model from "
21+
<< filePath << " with error " << status.ToString();
22+
throw std::runtime_error(errorMessageStream.str());
23+
}
24+
processor_.SetEncodeExtraOptions("bos");
25+
}
26+
27+
std::vector<int64_t> encode(const std::string& piece) {
28+
std::vector<int> ids;
29+
processor_.Encode(piece, &ids);
30+
std::vector<int64_t> idsLong(ids.begin(), ids.end());
31+
return idsLong;
32+
}
33+
34+
std::string decode(const std::vector<int64_t>& ids) {
35+
std::vector<int> idsInt(ids.begin(), ids.end());
36+
std::string piece;
37+
processor_.Decode(idsInt, &piece);
38+
return piece;
39+
}
40+
41+
private:
42+
sentencepiece::SentencePieceProcessor processor_;
43+
};

0 commit comments

Comments
 (0)