@@ -32,7 +32,7 @@ int main(int argc, char ** argv){
32
32
// tokenize the prompt
33
33
const bool add_bos = llama_should_add_bos_token (model);
34
34
35
- const char * static_input_file = " ./wikitext-2 -raw/wiki.train.raw" ;
35
+ const char * static_input_file = " ./wikitext-103 -raw/wiki.train.raw" ;
36
36
std::ifstream file (static_input_file);
37
37
if (!file) {
38
38
fprintf (stderr, " error: failed to open file '%s'\n " , static_input_file);
@@ -58,7 +58,6 @@ int main(int argc, char ** argv){
58
58
59
59
const int i_start = std::max (inp_size - nnew, ngram_size);
60
60
const int64_t t_start_ms = ggml_time_ms ();
61
- int percentage_done = 0 ;
62
61
for (int i = i_start; i < inp_size; ++i) {
63
62
const int ngram_start = i - ngram_size;
64
63
uint64_t ngram = inp_data[ngram_start];
@@ -83,21 +82,20 @@ int main(int argc, char ** argv){
83
82
}
84
83
}
85
84
86
- if (i >= inp_size*(percentage_done + 1 )/100 ) {
87
- ++percentage_done;
88
-
85
+ if (i % 10000000 == 0 ) {
89
86
const int64_t t_now_ms = ggml_time_ms ();
90
- const int64_t eta_ms = (100 - percentage_done ) * (t_now_ms - t_start_ms) / percentage_done ;
87
+ const int64_t eta_ms = (inp_size - i ) * (t_now_ms - t_start_ms) / i ;
91
88
const int64_t eta_min = eta_ms / (60 *1000 );
92
89
const int64_t eta_s = (eta_ms - eta_min) / 1000 ;
93
90
94
- fprintf (stderr, " lookup-create: %02d%% done, ETA: %02ld:%02ld\n " , percentage_done , eta_min, eta_s);
91
+ fprintf (stderr, " lookup-create: hashing %d/%d done, ETA: %02ld:%02ld\n " , i, inp_size , eta_min, eta_s);
95
92
}
96
93
}
97
94
};
98
95
99
96
all_token_hashmap atc;
100
97
update_hashmaps (&atc, inp_static.data (), inp_static.size (), inp_static.size ());
98
+ fprintf (stderr, " lookup-create: hashing done, writing file\n " );
101
99
102
100
std::ofstream file_out (" lookup.bin" , std::ios::binary);
103
101
for (std::pair<uint64_t , token_hashmap> item : atc) {
0 commit comments