Skip to content

Commit 12112bf

Browse files
committed
Add basic cpu setup
1 parent fd5ea0f commit 12112bf

File tree

8 files changed

+423
-3
lines changed

8 files changed

+423
-3
lines changed

BRANCH_SETUP.md

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Setup this branch
2+
3+
## Create a lora adpter bin file
4+
5+
0. `mkdir models/open-llama` and download [Open-llama (all files)](https://huggingface.co/openlm-research/open_llama_3b_v2/tree/main) in the folder `./models/open-llama`
6+
7+
2. `mkdir data && touch data/hot-lora.txt` and write a couple of words in it.
8+
9+
3. Run:
10+
```bash
11+
# Convert base model to gguf
12+
python3 convert-hf-to-gguf.py models/open-llama/
13+
# Quantize base model
14+
./quantize ./models/open-llama/ggml-model-f16.gguf ./models/open-llama/ggml-model-q8_0.gguf Q8_0
15+
# Obtain Lora adapter
16+
./finetune --model-base models/open-llama/ggml-model-q8_0.gguf \
17+
--checkpoint-in models/open-llama/chk-lora-ggml-model-q8_0-hot-lora-LATEST.gguf \
18+
--checkpoint-out models/open-llama/chk-lora-ggml-model-q8_0-hot-lora-ITERATION.gguf \
19+
--lora-out models/open-llama/lora-ggml-model-q8_0-hot-lora-ITERATION.bin \
20+
--train-data "data/hot-lora.txt" \
21+
--save-every 1 \
22+
--threads 1 \
23+
--adam-iter 1 \
24+
--batch 1 \
25+
--ctx 16 \
26+
--use-checkpointing
27+
```
28+
29+
## Run main with adapter
30+
31+
Run main with base model and lora adapter to hot-swap
32+
```bash
33+
./main ./models/open-llama/ggml-model-f16.gguf \
34+
--hot-lora models/open-llama/lora-ggml-model-q8_0-hot-lora-ITERATION.bin \
35+
-ngl 0 \
36+
-n 128
37+
```
38+
39+
With `ngl > 0` the code breaks. Probably because the Lora tensors try to interact with the base tensors (`lora_mul_mat`), but they are not moved to the buffer of the base tensors.
40+
41+
# Logic
42+
43+
44+
45+
# Current status
46+
47+
- Only ony Lora adapter can be passed.
48+
- GPU not supported

common/common.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -789,6 +789,14 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
789789
params.model = argv[i];
790790
return true;
791791
}
792+
if (arg == "-hl" || arg == "--hot-lora") {
793+
if (++i >= argc) {
794+
invalid_param = true;
795+
return true;
796+
}
797+
params.hot_lora = argv[i];
798+
return true;
799+
}
792800
if (arg == "-md" || arg == "--model-draft") {
793801
if (++i >= argc) {
794802
invalid_param = true;

common/common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ struct gpt_params {
100100

101101
std::string model = ""; // model path
102102
std::string model_draft = ""; // draft model for speculative decoding
103+
std::string hot_lora = ""; // lora model path for hot swapping
103104
std::string model_alias = "unknown"; // model alias
104105
std::string model_url = ""; // model url to download
105106
std::string hf_repo = ""; // HF repo

data/hot-lora.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
2+
how are you?

ggml.c

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4313,6 +4313,52 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
43134313
return NULL;
43144314
}
43154315

4316+
//////// LORA
4317+
4318+
struct lora_tensor_pair* build_lora_weights_map(struct ggml_context* ctx) {
4319+
struct lora_tensor_pair* pair = malloc(sizeof(struct lora_tensor_pair));
4320+
if (!pair) return NULL;
4321+
pair->pairs = NULL;
4322+
pair->count = 0;
4323+
pair->capacity = 0;
4324+
4325+
struct ggml_object * obj = ctx->objects_begin;
4326+
char * const mem_buffer = ctx->mem_buffer;
4327+
4328+
while (obj != NULL) {
4329+
if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
4330+
struct ggml_tensor * tensor = (struct ggml_tensor *)(mem_buffer + obj->offs);
4331+
char * tensor_name = tensor->name;
4332+
4333+
if (strlen(tensor_name) > 6 && (strcmp(tensor_name + strlen(tensor_name) - 6, ".loraA") == 0 ||
4334+
strcmp(tensor_name + strlen(tensor_name) - 6, ".loraB") == 0)) {
4335+
if (pair->count == pair->capacity) {
4336+
pair->capacity = pair->capacity > 0 ? pair->capacity * 2 : 4;
4337+
pair->pairs = realloc(pair->pairs, pair->capacity * sizeof(struct lora_tensor_info));
4338+
}
4339+
4340+
pair->pairs[pair->count].name = strdup(tensor_name);
4341+
pair->pairs[pair->count].tensor = tensor;
4342+
pair->count++;
4343+
}
4344+
}
4345+
obj = obj->next;
4346+
}
4347+
4348+
return pair;
4349+
}
4350+
4351+
void free_lora_tensor_pair(struct lora_tensor_pair* pair) {
4352+
if (!pair) return;
4353+
for (int i = 0; i < pair->count; i++) {
4354+
free(pair->pairs[i].name);
4355+
}
4356+
free(pair->pairs);
4357+
free(pair);
4358+
}
4359+
4360+
//////// LORA
4361+
43164362
////////////////////////////////////////////////////////////////////////////////
43174363

43184364
// ggml_dup

ggml.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -835,6 +835,25 @@ extern "C" {
835835
GGML_API struct ggml_tensor * ggml_get_next_tensor (const struct ggml_context * ctx, struct ggml_tensor * tensor);
836836
GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
837837

838+
struct lora_tensor_info {
839+
char* name;
840+
struct ggml_tensor* tensor;
841+
};
842+
843+
struct lora_tensor_pair {
844+
struct lora_tensor_info* pairs; // Dynamic array of tensor pairs
845+
int count;
846+
int capacity;
847+
};
848+
849+
// Function to build tensor pairs
850+
struct lora_tensor_pair* build_lora_weights_map(struct ggml_context* ctx);
851+
852+
// Cleanup function for lora_tensor_pair
853+
void free_lora_tensor_pair(struct lora_tensor_pair* pair);
854+
855+
856+
838857
GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
839858
GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
840859
GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);

0 commit comments

Comments
 (0)