File tree Expand file tree Collapse file tree 2 files changed +6
-2
lines changed
examples/models/llama/runner Expand file tree Collapse file tree 2 files changed +6
-2
lines changed Original file line number Diff line number Diff line change @@ -91,10 +91,11 @@ def main() -> None:
91
91
else runner .text_completion (
92
92
prompt = args .prompt ,
93
93
temperature = args .temperature ,
94
+ echo = True ,
94
95
)
95
96
)
96
97
if args .show_tokens :
97
- print (f"Tokens : { generated_tokens } " )
98
+ print (f"Generated { len ( generated_tokens ) } tokens : { generated_tokens } " )
98
99
99
100
100
101
if __name__ == "__main__" :
Original file line number Diff line number Diff line change @@ -64,6 +64,7 @@ def forward(
64
64
def generate ( # noqa: C901
65
65
self ,
66
66
prompt_tokens : List [int ],
67
+ max_seq_len : int ,
67
68
temperature : float = 0.8 ,
68
69
top_p : float = 0.9 ,
69
70
echo : bool = False ,
@@ -83,7 +84,7 @@ def generate( # noqa: C901
83
84
print (f"{ self .tokenizer .decode_token (current_token )} " , end = "" , flush = True )
84
85
tokens = prompt_tokens + [current_token ]
85
86
86
- while len (tokens ) < self . params . max_seq_len :
87
+ while len (tokens ) < max_seq_len :
87
88
if self .params .use_kv_cache :
88
89
logits = self .forward (
89
90
tokens = torch .tensor (
@@ -135,6 +136,7 @@ def text_completion(
135
136
"""
136
137
return self .generate (
137
138
prompt_tokens = self .tokenizer .encode (prompt , bos = True , eos = False ),
139
+ max_seq_len = self .params .max_seq_len ,
138
140
temperature = temperature ,
139
141
top_p = top_p ,
140
142
echo = echo ,
@@ -169,6 +171,7 @@ def chat_completion(
169
171
prompt_tokens = self .tokenizer .encode (
170
172
self ._format_prompt (prompt ), bos = True , eos = False
171
173
),
174
+ max_seq_len = self .params .max_seq_len ,
172
175
temperature = temperature ,
173
176
top_p = top_p ,
174
177
echo = True ,
You can’t perform that action at this time.
0 commit comments