Remove logic for appending or prepending tokens

helunwencser · helunwencser · commit b1c3713d697d · 2024-08-26T15:38:07.000-07:00
ghstack-source-id: faf0542 ghstack-comment-id: 2311228673 Pull Request resolved: #4920
diff --git a/examples/models/llama2/export_llama_lib.py b/examples/models/llama2/export_llama_lib.py
@@ -569,12 +569,9 @@ def _load_llama_model_metadata(
 ):
     is_fairseq2 = weight_type == WeightType.FAIRSEQ2
     metadata = {
-        "append_eos_to_prompt": is_fairseq2,  # For language llama, tell the runtime to always append EOS token(s) to prompt.
         "get_bos_id": 3 if is_fairseq2 else 1,
         "get_eos_ids": [3] if is_fairseq2 else [2],
         "get_max_seq_len": model_args.max_seq_len,
-        "get_n_bos": 1,
-        "get_n_eos": 2 if is_fairseq2 else 1,
         "get_vocab_size": model_args.vocab_size,
         "use_kv_cache": use_kv_cache,
         "use_sdpa_with_kv_cache": use_sdpa_with_kv_cache,
diff --git a/examples/models/llama2/runner/runner.cpp b/examples/models/llama2/runner/runner.cpp
@@ -24,13 +24,10 @@
 
 namespace torch::executor {
 namespace {
-static constexpr auto kAppendEosToPrompt = "append_eos_to_prompt";
 static constexpr auto kEnableDynamicShape = "enable_dynamic_shape";
 static constexpr auto kBosId = "get_bos_id";
 static constexpr auto kEosIds = "get_eos_ids";
 static constexpr auto kMaxSeqLen = "get_max_seq_len";
-static constexpr auto kNBos = "get_n_bos";
-static constexpr auto kNEos = "get_n_eos";
 static constexpr auto kVocabSize = "get_vocab_size";
 static constexpr auto kUseKVCache = "use_kv_cache";
 static constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache";
@@ -54,11 +51,8 @@ Runner::Runner(
 #endif
               ),
       metadata_({
-          {kAppendEosToPrompt, false},
           {kEnableDynamicShape, false},
           {kMaxSeqLen, 128},
-          {kNBos, 1},
-          {kNEos, 1},
           {kUseKVCache, true},
           {kUseSDPAWithKVCache, false},
       }) {
@@ -174,8 +168,8 @@ Error Runner::generate(
 
   Result<std::vector<uint64_t>> encode_res = tokenizer_->encode(
       prompt,
-      metadata_.at(kNBos),
-      metadata_.at(kAppendEosToPrompt) ? metadata_.at(kNEos) : 0);
+      /* bos */ 0,
+      /* eos */ 0);
 
   ET_CHECK_OK_OR_RETURN_ERROR(
       encode_res.error(), "Failed to encode prompt %s", prompt.c_str());