Skip to content

Commit 17d39e4

Browse files
helunwencserfacebook-github-bot
authored andcommitted
Remove logic for appending or prepending tokens (#4920)
Summary: Pull Request resolved: #4920 The runner should not prepend or append any tokens to the prompt. Users should make sure that the prompt is properly formatted as the model's chat template. This PR removes the logic for appending or prepending tokens. Test Plan: CI imported-using-ghimport Differential Revision: D61824091 Pulled By: helunwencser
1 parent 0a11e99 commit 17d39e4

File tree

4 files changed

+4
-15
lines changed

4 files changed

+4
-15
lines changed

examples/models/llama2/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ python -m examples.models.llama2.export_llama \
146146
--use_sdpa_with_kv_cache \
147147
-X \
148148
-d bf16 \
149-
--metadata '{"append_eos_to_prompt": 0, "get_bos_id":128000, "get_eos_ids":[128009, 128001], "get_n_bos": 0, "get_n_eos": 0}' \
149+
--metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}' \
150150
--output_name="llama3_2.pte"
151151
```
152152

@@ -172,7 +172,7 @@ python -m examples.models.llama2.export_llama \
172172
-d fp32 \
173173
--preq_embedding_quantize 8,0 \
174174
--use_spin_quant native \
175-
--metadata '{"append_eos_to_prompt": 0, "get_bos_id":128000, "get_eos_ids":[128009, 128001], "get_n_bos": 0, "get_n_eos": 0}'
175+
--metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}'
176176
```
177177

178178
### Option B: Download and export Llama 3 8B instruct model

examples/models/llama2/export_llama_lib.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -723,12 +723,9 @@ def _load_llama_model_metadata(
723723
):
724724
is_fairseq2 = weight_type == WeightType.FAIRSEQ2
725725
metadata = {
726-
"append_eos_to_prompt": is_fairseq2, # For language llama, tell the runtime to always append EOS token(s) to prompt.
727726
"get_bos_id": 3 if is_fairseq2 else 1,
728727
"get_eos_ids": [3] if is_fairseq2 else [2],
729728
"get_max_seq_len": model_args.max_seq_len,
730-
"get_n_bos": 1,
731-
"get_n_eos": 2 if is_fairseq2 else 1,
732729
"get_n_layers": model_args.n_layers,
733730
"get_vocab_size": model_args.vocab_size,
734731
"use_kv_cache": use_kv_cache,

examples/models/llama2/runner/runner.cpp

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,10 @@ using ::executorch::runtime::Result;
2727
namespace llm = ::executorch::extension::llm;
2828

2929
namespace {
30-
static constexpr auto kAppendEosToPrompt = "append_eos_to_prompt";
3130
static constexpr auto kEnableDynamicShape = "enable_dynamic_shape";
3231
static constexpr auto kBosId = "get_bos_id";
3332
static constexpr auto kEosIds = "get_eos_ids";
3433
static constexpr auto kMaxSeqLen = "get_max_seq_len";
35-
static constexpr auto kNBos = "get_n_bos";
36-
static constexpr auto kNEos = "get_n_eos";
3734
static constexpr auto kVocabSize = "get_vocab_size";
3835
static constexpr auto kUseKVCache = "use_kv_cache";
3936
static constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache";
@@ -50,11 +47,8 @@ Runner::Runner(
5047
module_(std::make_unique<Module>(model_path, Module::LoadMode::File)),
5148
tokenizer_path_(tokenizer_path),
5249
metadata_({
53-
{kAppendEosToPrompt, false},
5450
{kEnableDynamicShape, false},
5551
{kMaxSeqLen, 128},
56-
{kNBos, 1},
57-
{kNEos, 1},
5852
{kUseKVCache, true},
5953
{kUseSDPAWithKVCache, false},
6054
}) {
@@ -203,8 +197,8 @@ Error Runner::generate(
203197

204198
Result<std::vector<uint64_t>> encode_res = tokenizer_->encode(
205199
prompt,
206-
metadata_.at(kNBos),
207-
metadata_.at(kAppendEosToPrompt) ? metadata_.at(kNEos) : 0);
200+
/* bos */ 0,
201+
/* eos */ 0);
208202

209203
ET_CHECK_OK_OR_RETURN_ERROR(
210204
encode_res.error(), "Failed to encode prompt %s", prompt.c_str());

extension/export_util/export_hf_model.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,6 @@ def _get_constant_methods(model: PreTrainedModel):
8585
"get_head_dim": model.config.hidden_size / model.config.num_attention_heads,
8686
"get_max_batch_size": model.generation_config.cache_config.batch_size,
8787
"get_max_seq_len": model.generation_config.cache_config.max_cache_len,
88-
"get_n_bos": 1,
89-
"get_n_eos": 1,
9088
"get_n_kv_heads": model.config.num_key_value_heads,
9189
"get_n_layers": model.config.num_hidden_layers,
9290
"get_vocab_size": model.config.vocab_size,

0 commit comments

Comments
 (0)