Skip to content

Commit 0cf40f3

Browse files
fix rng seed, file path, create prints
1 parent 3c81e10 commit 0cf40f3

File tree

2 files changed

+6
-7
lines changed

2 files changed

+6
-7
lines changed

examples/lookup/lookup-create.cpp

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ int main(int argc, char ** argv){
3232
// tokenize the prompt
3333
const bool add_bos = llama_should_add_bos_token(model);
3434

35-
const char * static_input_file = "./wikitext-2-raw/wiki.train.raw";
35+
const char * static_input_file = "./wikitext-103-raw/wiki.train.raw";
3636
std::ifstream file(static_input_file);
3737
if (!file) {
3838
fprintf(stderr, "error: failed to open file '%s'\n", static_input_file);
@@ -58,7 +58,6 @@ int main(int argc, char ** argv){
5858

5959
const int i_start = std::max(inp_size - nnew, ngram_size);
6060
const int64_t t_start_ms = ggml_time_ms();
61-
int percentage_done = 0;
6261
for (int i = i_start; i < inp_size; ++i) {
6362
const int ngram_start = i - ngram_size;
6463
uint64_t ngram = inp_data[ngram_start];
@@ -83,21 +82,20 @@ int main(int argc, char ** argv){
8382
}
8483
}
8584

86-
if (i >= inp_size*(percentage_done + 1)/100) {
87-
++percentage_done;
88-
85+
if (i % 10000000 == 0) {
8986
const int64_t t_now_ms = ggml_time_ms();
90-
const int64_t eta_ms = (100 - percentage_done) * (t_now_ms - t_start_ms) / percentage_done;
87+
const int64_t eta_ms = (inp_size - i) * (t_now_ms - t_start_ms) / i;
9188
const int64_t eta_min = eta_ms / (60*1000);
9289
const int64_t eta_s = (eta_ms - eta_min) / 1000;
9390

94-
fprintf(stderr, "lookup-create: %02d%% done, ETA: %02ld:%02ld\n", percentage_done, eta_min, eta_s);
91+
fprintf(stderr, "lookup-create: hashing %d/%d done, ETA: %02ld:%02ld\n", i, inp_size, eta_min, eta_s);
9592
}
9693
}
9794
};
9895

9996
all_token_hashmap atc;
10097
update_hashmaps(&atc, inp_static.data(), inp_static.size(), inp_static.size());
98+
fprintf(stderr, "lookup-create: hashing done, writing file\n");
10199

102100
std::ofstream file_out("lookup.bin", std::ios::binary);
103101
for (std::pair<uint64_t, token_hashmap> item : atc) {

examples/lookup/lookup.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ int main(int argc, char ** argv){
5252

5353
// load the model
5454
std::tie(model, ctx) = llama_init_from_gpt_params(params);
55+
llama_set_rng_seed(ctx, params.seed);
5556
GGML_ASSERT(llama_n_vocab(model) < (1 << 16));
5657

5758
// tokenize the prompt

0 commit comments

Comments
 (0)