Rebase and minor fix

Joey Tsai · Joey Tsai · commit 0cff7c92c37c · 2024-11-22T09:37:24.000+08:00
- Fix rebase conflict
- Change input dtype of calibration function
diff --git a/examples/qualcomm/oss_scripts/llama2/llama.py b/examples/qualcomm/oss_scripts/llama2/llama.py
@@ -203,7 +203,7 @@ def sample_top_p(probs: torch.Tensor, top_p: float) -> torch.Tensor:
         return probs_indices.gather(dim=-1, index=next_token)
 
     with torch.no_grad():
-        while token_list[-1] != sp_model.eos_id() and pos < max_seq_len:
+        while token_list[-1] != sp_model.eos_id() and pos < max_seq_len - 1:
             logits, new_k_caches, new_v_caches = module(
                 torch.full((1, 1), token_list[pos]),
                 atten_mask,
@@ -248,7 +248,7 @@ def _bert_calibrate(
         token_list = torch.cat(
             [
                 token_list,
-                torch.zeros((1, max_cache_len - last_prompt_pos), dtype=torch.int64),
+                torch.zeros((1, max_cache_len - last_prompt_pos), dtype=torch.int32),
             ],
             dim=1,
         )
diff --git a/examples/qualcomm/oss_scripts/llama3_2/llama.py b/examples/qualcomm/oss_scripts/llama3_2/llama.py
@@ -122,7 +122,7 @@ def _bert_calibrate(
         token_list = torch.cat(
             [
                 token_list,
-                torch.zeros((1, max_cache_len - last_prompt_pos), dtype=torch.int64),
+                torch.zeros((1, max_cache_len - last_prompt_pos), dtype=torch.int32),
             ],
             dim=1,
         )
diff --git a/examples/qualcomm/oss_scripts/llama3_2/runner/runner.cpp b/examples/qualcomm/oss_scripts/llama3_2/runner/runner.cpp
@@ -45,8 +45,6 @@ Runner::Runner(
     const int eval_mode)
     : n_bos_(1),
       n_eos_(1),
-      vocab_size_(QNN_LLAMA3_2_LOGITS),
-      max_seq_len_(QNN_LLAMA3_2_SEQLEN),
       tokenizer_path_(tokenizer_path),
       temperature_(temperature),
       eval_mode_(eval_mode),

Original file line number	Diff line number	Diff line change
`@@ -122,7 +122,7 @@ def _bert_calibrate(`
`122`	`122`	`token_list = torch.cat(`
`123`	`123`	`[`
`124`	`124`	`token_list,`
`125`		`- torch.zeros((1, max_cache_len - last_prompt_pos), dtype=torch.int64),`
	`125`	`+ torch.zeros((1, max_cache_len - last_prompt_pos), dtype=torch.int32),`
`126`	`126`	`],`
`127`	`127`	`dim=1,`
`128`	`128`	`)`