leejet
diff --git a/‎README.md
Lines changed: 44 additions & 2 deletions b/‎README.md
Lines changed: 44 additions & 2 deletions
diff --git a/‎assets/with_lcm.png
596 KB b/‎assets/with_lcm.png
596 KB
diff --git a/‎assets/without_lcm.png
533 KB b/‎assets/without_lcm.png
533 KB
diff --git a/‎examples/main.cpp
Lines changed: 11 additions & 2 deletions b/‎examples/main.cpp
Lines changed: 11 additions & 2 deletions
diff --git a/‎models/convert.py
Lines changed: 110 additions & 18 deletions b/‎models/convert.py
Lines changed: 110 additions & 18 deletions
@@ -18,6 +18,8 @@ Inference of [Stable Diffusion](https://github.com/CompVis/stable-diffusion) in
 - Original `txt2img` and `img2img` mode
 - Negative prompt
 - [stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) style tokenizer (not all the features, only token weighting for now)
+- LoRA support, same as [stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features#lora)
+- Latent Consistency Models support(LCM/LCM-LoRA)
 - Sampling method
     - `Euler A`
     - `Euler`
@@ -42,7 +44,6 @@ Inference of [Stable Diffusion](https://github.com/CompVis/stable-diffusion) in
 - [ ] Make inference faster
     - The current implementation of ggml_conv_2d is slow and has high memory usage
 - [ ] Continuing to reduce memory usage (quantizing the weights of ggml_conv_2d)
-- [ ] LoRA support
 - [ ] k-quants support
 
 ## Usage
@@ -125,6 +126,7 @@ arguments:
   -t, --threads N                    number of threads to use during computation (default: -1).
                                      If threads <= 0, then threads will be set to the number of CPU physical cores
   -m, --model [MODEL]                path to model
+  --lora-model-dir [DIR]             lora model directory
   -i, --init-img [IMAGE]             path to the input image, required by img2img
   -o, --output OUTPUT                path to write result image to (default: .\output.png)
   -p, --prompt [PROMPT]              the prompt to render
@@ -134,11 +136,12 @@ arguments:
                                      1.0 corresponds to full destruction of information in init image
   -H, --height H                     image height, in pixel space (default: 512)
   -W, --width W                      image width, in pixel space (default: 512)
-  --sampling-method {euler, euler_a, heun, dpm++2m, dpm++2mv2, lcm}
+  --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, lcm}
                                      sampling method (default: "euler_a")
   --steps  STEPS                     number of sample steps (default: 20)
   --rng {std_default, cuda}          RNG (default: cuda)
   -s SEED, --seed SEED               RNG seed (default: 42, use random seed for < 0)
+  --schedule {discrete, karras}      Denoiser sigma schedule (default: discrete)
   -v, --verbose                      print extra info
 ```
 
@@ -167,6 +170,45 @@ Using formats of different precisions will yield results of varying quality.
   <img src="./assets/img2img_output.png" width="256x">
 </p>
 
+#### with LoRA
+
+- convert lora weights to ggml model format
+
+    ```shell
+    cd models
+    python convert.py [path to weights] --lora
+    # For example, python convert.py marblesh.safetensors
+    ```
+
+- You can specify the directory where the lora weights are stored via `--lora-model-dir`. If not specified, the default is the current working directory.
+
+- LoRA is specified via prompt, just like [stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features#lora).
+
+Here's a simple example:
+
+```
+./bin/sd -m ../models/v1-5-pruned-emaonly-ggml-model-f16.bin -p "a lovely cat<lora:marblesh:1>" --lora-model-dir ../models
+```
+
+`../models/marblesh-ggml-lora.bin` will be applied to the model
+
+#### LCM/LCM-LoRA
+
+- Download LCM-LoRA form https://huggingface.co/latent-consistency/lcm-lora-sdv1-5
+- Specify LCM-LoRA by adding `<lora:lcm-lora-sdv1-5:1>` to prompt
+- It's advisable to set `--cfg-scale` to `1.0` instead of the default `7.0`. For `--steps`, a range of `2-8` steps is recommended. For `--sampling-method`, `lcm`/`euler_a` is recommended.
+
+Here's a simple example:
+
+```
+./bin/sd -m ../models/v1-5-pruned-emaonly-ggml-model-f16.bin -p "a lovely cat<lora:lcm-lora-sdv1-5:1>" --steps 4 --lora-model-dir ../models -v --cfg-scale 1
+```
+
+| without LCM-LoRA (--cfg-scale 7)  | with LCM-LoRA (--cfg-scale 1)  |
+| ----  |----    |
+| ![](./assets/without_lcm.png) |![](./assets/with_lcm.png)  |
+
+
 ### Docker
 
 #### Building using Docker
 
@@ -95,6 +95,7 @@ struct Option {
     int n_threads = -1;
     std::string mode = TXT2IMG;
     std::string model_path;
+    std::string lora_model_dir;
     std::string output_path = "output.png";
     std::string init_img;
     std::string prompt;
@@ -115,6 +116,7 @@ struct Option {
         printf("    n_threads:       %d\n", n_threads);
         printf("    mode:            %s\n", mode.c_str());
         printf("    model_path:      %s\n", model_path.c_str());
+        printf("    lora_model_dir:  %s\n", lora_model_dir.c_str());
         printf("    output_path:     %s\n", output_path.c_str());
         printf("    init_img:        %s\n", init_img.c_str());
         printf("    prompt:          %s\n", prompt.c_str());
@@ -127,7 +129,7 @@ struct Option {
         printf("    sample_steps:    %d\n", sample_steps);
         printf("    strength:        %.2f\n", strength);
         printf("    rng:             %s\n", rng_type_to_str[rng_type]);
-        printf("    seed:            %ld\n", seed);
+        printf("    seed:            %lld\n", seed);
     }
 };
 
@@ -140,6 +142,7 @@ void print_usage(int argc, const char* argv[]) {
     printf("  -t, --threads N                    number of threads to use during computation (default: -1).\n");
     printf("                                     If threads <= 0, then threads will be set to the number of CPU physical cores\n");
     printf("  -m, --model [MODEL]                path to model\n");
+    printf("  --lora-model-dir [DIR]             lora model directory\n");
     printf("  -i, --init-img [IMAGE]             path to the input image, required by img2img\n");
     printf("  -o, --output OUTPUT                path to write result image to (default: .\\output.png)\n");
     printf("  -p, --prompt [PROMPT]              the prompt to render\n");
@@ -183,6 +186,12 @@ void parse_args(int argc, const char* argv[], Option* opt) {
                 break;
             }
             opt->model_path = argv[i];
+        } else if (arg == "--lora-model-dir") {
+            if (++i >= argc) {
+                invalid_arg = true;
+                break;
+            }
+            opt->lora_model_dir = argv[i];
         } else if (arg == "-i" || arg == "--init-img") {
             if (++i >= argc) {
                 invalid_arg = true;
@@ -419,7 +428,7 @@ int main(int argc, const char* argv[]) {
         init_img.assign(img_data, img_data + (opt.w * opt.h * c));
     }
 
-    StableDiffusion sd(opt.n_threads, vae_decode_only, true, opt.rng_type);
+    StableDiffusion sd(opt.n_threads, vae_decode_only, true, opt.lora_model_dir, opt.rng_type);
     if (!sd.load_from_file(opt.model_path, opt.schedule)) {
         return 1;
     }
 
@@ -4,6 +4,7 @@
 
 import numpy as np
 import torch
+import re
 import safetensors.torch
 
 this_file_dir = os.path.dirname(__file__)
@@ -270,21 +271,107 @@ def preprocess(state_dict):
         new_state_dict[name] = w
     return new_state_dict
 
-def convert(model_path, out_type = None, out_file=None):
+re_digits = re.compile(r"\d+")
+re_x_proj = re.compile(r"(.*)_([qkv]_proj)$")
+re_compiled = {}
+
+suffix_conversion = {
+    "attentions": {},
+    "resnets": {
+        "conv1": "in_layers_2",
+        "conv2": "out_layers_3",
+        "norm1": "in_layers_0",
+        "norm2": "out_layers_0",
+        "time_emb_proj": "emb_layers_1",
+        "conv_shortcut": "skip_connection",
+    }
+}
+
+
+def convert_diffusers_name_to_compvis(key):
+    def match(match_list, regex_text):
+        regex = re_compiled.get(regex_text)
+        if regex is None:
+            regex = re.compile(regex_text)
+            re_compiled[regex_text] = regex
+
+        r = re.match(regex, key)
+        if not r:
+            return False
+
+        match_list.clear()
+        match_list.extend([int(x) if re.match(re_digits, x) else x for x in r.groups()])
+        return True
+
+    m = []
+
+    if match(m, r"lora_unet_conv_in(.*)"):
+        return f'model_diffusion_model_input_blocks_0_0{m[0]}'
+
+    if match(m, r"lora_unet_conv_out(.*)"):
+        return f'model_diffusion_model_out_2{m[0]}'
+
+    if match(m, r"lora_unet_time_embedding_linear_(\d+)(.*)"):
+        return f"model_diffusion_model_time_embed_{m[0] * 2 - 2}{m[1]}"
+
+    if match(m, r"lora_unet_down_blocks_(\d+)_(attentions|resnets)_(\d+)_(.+)"):
+        suffix = suffix_conversion.get(m[1], {}).get(m[3], m[3])
+        return f"model_diffusion_model_input_blocks_{1 + m[0] * 3 + m[2]}_{1 if m[1] == 'attentions' else 0}_{suffix}"
+
+    if match(m, r"lora_unet_mid_block_(attentions|resnets)_(\d+)_(.+)"):
+        suffix = suffix_conversion.get(m[0], {}).get(m[2], m[2])
+        return f"model_diffusion_model_middle_block_{1 if m[0] == 'attentions' else m[1] * 2}_{suffix}"
+
+    if match(m, r"lora_unet_up_blocks_(\d+)_(attentions|resnets)_(\d+)_(.+)"):
+        suffix = suffix_conversion.get(m[1], {}).get(m[3], m[3])
+        return f"model_diffusion_model_output_blocks_{m[0] * 3 + m[2]}_{1 if m[1] == 'attentions' else 0}_{suffix}"
+
+    if match(m, r"lora_unet_down_blocks_(\d+)_downsamplers_0_conv"):
+        return f"model_diffusion_model_input_blocks_{3 + m[0] * 3}_0_op"
+
+    if match(m, r"lora_unet_up_blocks_(\d+)_upsamplers_0_conv"):
+        return f"model_diffusion_model_output_blocks_{2 + m[0] * 3}_{2 if m[0]>0 else 1}_conv"
+
+    if match(m, r"lora_te_text_model_encoder_layers_(\d+)_(.+)"):
+        return f"cond_stage_model_transformer_text_model_encoder_layers_{m[0]}_{m[1]}"
+
+    return None
+
+def preprocess_lora(state_dict):
+    new_state_dict = {}
+    for name, w in state_dict.items():
+        if not isinstance(w, torch.Tensor):
+            continue
+        name_without_network_parts, network_part = name.split(".", 1)
+        new_name_without_network_parts = convert_diffusers_name_to_compvis(name_without_network_parts)
+        if new_name_without_network_parts == None:
+            raise Exception(f"unknown lora tensor: {name}")
+        new_name = new_name_without_network_parts + "." + network_part
+        print(f"preprocess {name} => {new_name}")
+        new_state_dict[new_name] = w
+    return new_state_dict
+
+def convert(model_path, out_type = None, out_file=None, lora=False):
     # load model
-    with open(os.path.join(vocab_dir, "vocab.json"), encoding="utf-8") as f:
-        clip_vocab = json.load(f)
-    
+    if not lora:
+        with open(os.path.join(vocab_dir, "vocab.json"), encoding="utf-8") as f:
+            clip_vocab = json.load(f)
+
     state_dict = load_model_from_file(model_path)
-    model_type = SD1
-    if "cond_stage_model.model.token_embedding.weight" in state_dict.keys():
+    model_type = SD1 # lora only for SD1 now
+    if not lora and "cond_stage_model.model.token_embedding.weight" in state_dict.keys():
         model_type = SD2
         print("Stable diffuison 2.x")
     else:
         print("Stable diffuison 1.x")
-    state_dict = preprocess(state_dict)
+    if lora:
+        state_dict = preprocess_lora(state_dict)
+    else:
+        state_dict = preprocess(state_dict)
 
     # output option
+    if lora:
+        out_type = "f16" # only f16 for now
     if out_type == None:
         weight = state_dict["model.diffusion_model.input_blocks.0.0.weight"].numpy()
         if weight.dtype == np.float32:
@@ -296,7 +383,10 @@ def convert(model_path, out_type = None, out_file=None):
         else:
             raise Exception("unsupported weight type %s" % weight.dtype)
     if out_file == None:
-        out_file = os.path.splitext(os.path.basename(model_path))[0] + f"-ggml-model-{out_type}.bin"
+        if lora:
+            out_file = os.path.splitext(os.path.basename(model_path))[0] + f"-ggml-lora.bin"
+        else:
+            out_file = os.path.splitext(os.path.basename(model_path))[0] + f"-ggml-model-{out_type}.bin"
         out_file = os.path.join(os.getcwd(), out_file)
     print(f"Saving GGML compatible file to {out_file}")
 
@@ -309,14 +399,15 @@ def convert(model_path, out_type = None, out_file=None):
         file.write(struct.pack("i", ftype))
 
         # vocab
-        byte_encoder = bytes_to_unicode()
-        byte_decoder = {v: k for k, v in byte_encoder.items()}
-        file.write(struct.pack("i", len(clip_vocab)))
-        for key in clip_vocab:
-            text = bytearray([byte_decoder[c] for c in key])
-            file.write(struct.pack("i", len(text)))
-            file.write(text)
-        
+        if not lora:
+            byte_encoder = bytes_to_unicode()
+            byte_decoder = {v: k for k, v in byte_encoder.items()}
+            file.write(struct.pack("i", len(clip_vocab)))
+            for key in clip_vocab:
+                text = bytearray([byte_decoder[c] for c in key])
+                file.write(struct.pack("i", len(text)))
+                file.write(text)
+
         # weights
         for name in state_dict.keys():
             if not isinstance(state_dict[name], torch.Tensor):
@@ -337,7 +428,7 @@ def convert(model_path, out_type = None, out_file=None):
             old_type = data.dtype
 
             ttype = "f32"
-            if n_dims == 4:
+            if n_dims == 4 and not lora:
                 data = data.astype(np.float16)
                 ttype = "f16"
             elif n_dims == 2 and name[-7:] == ".weight":
@@ -380,6 +471,7 @@ def convert(model_path, out_type = None, out_file=None):
     parser = argparse.ArgumentParser(description="Convert Stable Diffuison model to GGML compatible file format")
     parser.add_argument("--out_type", choices=["f32", "f16", "q4_0", "q4_1", "q5_0", "q5_1", "q8_0"], help="output format (default: based on input)")
     parser.add_argument("--out_file", help="path to write to; default: based on input and current working directory")
+    parser.add_argument("--lora", action='store_true', default = False, help="convert lora weight; default: false")
     parser.add_argument("model_path", help="model file path (*.pth, *.pt, *.ckpt, *.safetensors)")
     args = parser.parse_args()
-    convert(args.model_path, args.out_type, args.out_file)
+    convert(args.model_path, args.out_type, args.out_file, args.lora)