Skip to content

Commit cda97bc

Browse files
committed
Revert "Merge branch 'ggerganov:master' into master"
This reverts commit b016f4c, reversing changes made to c0ad8bb.
1 parent b016f4c commit cda97bc

File tree

9 files changed

+2090
-3025
lines changed

9 files changed

+2090
-3025
lines changed

bindings/javascript/whisper.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

examples/talk-llama/llama.cpp

Lines changed: 843 additions & 907 deletions
Large diffs are not rendered by default.

examples/talk-llama/llama.h

Lines changed: 9 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
#include <stdbool.h>
77

88
#ifdef LLAMA_SHARED
9-
# if defined(_WIN32) && !defined(__MINGW32__)
9+
# ifdef _WIN32
1010
# ifdef LLAMA_BUILD
1111
# define LLAMA_API __declspec(dllexport)
1212
# else
@@ -20,7 +20,7 @@
2020
#endif
2121

2222
#define LLAMA_FILE_VERSION 1
23-
#define LLAMA_FILE_MAGIC 0x67676a74 // 'ggjt' in hex
23+
#define LLAMA_FILE_MAGIC 0x67676d66 // 'ggmf' in hex
2424
#define LLAMA_FILE_MAGIC_UNVERSIONED 0x67676d6c // pre-versioned files
2525

2626
#ifdef __cplusplus
@@ -45,7 +45,7 @@ extern "C" {
4545

4646
} llama_token_data;
4747

48-
typedef void (*llama_progress_callback)(float progress, void *ctx);
48+
typedef void (*llama_progress_callback)(double progress, void *ctx);
4949

5050
struct llama_context_params {
5151
int n_ctx; // text context
@@ -55,7 +55,6 @@ extern "C" {
5555
bool f16_kv; // use fp16 for KV cache
5656
bool logits_all; // the llama_eval() call computes all logits, not just the last one
5757
bool vocab_only; // only load the vocabulary, no weights
58-
bool use_mmap; // use mmap if possible
5958
bool use_mlock; // force system to keep model in RAM
6059
bool embedding; // embedding mode only
6160

@@ -67,9 +66,6 @@ extern "C" {
6766

6867
LLAMA_API struct llama_context_params llama_context_default_params();
6968

70-
LLAMA_API bool llama_mmap_supported();
71-
LLAMA_API bool llama_mlock_supported();
72-
7369
// Various functions for loading a ggml llama model.
7470
// Allocate (almost) all memory needed for the model.
7571
// Return NULL on failure
@@ -85,24 +81,8 @@ extern "C" {
8581
LLAMA_API int llama_model_quantize(
8682
const char * fname_inp,
8783
const char * fname_out,
88-
int itype);
89-
90-
// Returns the KV cache that will contain the context for the
91-
// ongoing prediction with the model.
92-
LLAMA_API const uint8_t * llama_get_kv_cache(struct llama_context * ctx);
93-
94-
// Returns the size of the KV cache
95-
LLAMA_API size_t llama_get_kv_cache_size(struct llama_context * ctx);
96-
97-
// Returns the number of tokens in the KV cache
98-
LLAMA_API int llama_get_kv_cache_token_count(struct llama_context * ctx);
99-
100-
// Sets the KV cache containing the current context for the model
101-
LLAMA_API void llama_set_kv_cache(
102-
struct llama_context * ctx,
103-
const uint8_t * kv_cache,
104-
size_t n_size,
105-
int n_token_count);
84+
int itype,
85+
int qk);
10686

10787
// Run the llama inference to obtain the logits and probabilities for the next token.
10888
// tokens + n_tokens is the provided batch of new tokens to process
@@ -155,9 +135,9 @@ extern "C" {
155135
const llama_token * last_n_tokens_data,
156136
int last_n_tokens_size,
157137
int top_k,
158-
float top_p,
159-
float temp,
160-
float repeat_penalty);
138+
double top_p,
139+
double temp,
140+
double repeat_penalty);
161141

162142
// Performance information
163143
LLAMA_API void llama_print_timings(struct llama_context * ctx);
@@ -170,4 +150,4 @@ extern "C" {
170150
}
171151
#endif
172152

173-
#endif // LLAMA_H
153+
#endif

examples/talk-llama/llama_internal.h

Lines changed: 0 additions & 12 deletions
This file was deleted.

0 commit comments

Comments
 (0)