You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: BRANCH_SETUP.md
+253-1Lines changed: 253 additions & 1 deletion
Original file line number
Diff line number
Diff line change
@@ -36,6 +36,8 @@ Run main with base model and lora adapter to hot-swap
36
36
-n 128
37
37
```
38
38
39
+
Working but `ggml_metal_get_buffer: error: tensor 'blk.16.attn_v.weight.loraB' buffer is nil`
40
+
39
41
With `ngl > 0` the code breaks. Probably because the Lora tensors try to interact with the base tensors (as in`lora_mul_mat`), but the lora tensors are not moved to the gpu buffer of the base tensors.
40
42
41
43
# Logic
@@ -47,4 +49,254 @@ With `ngl > 0` the code breaks. Probably because the Lora tensors try to interac
47
49
48
50
- Only one Lora adapter can be passed.
49
51
- Applying only adapter to Q, K, V matrices to keep the code contained (fintuning trained lora tensors for all linear layers)
50
-
- GPU not supported
52
+
- GPU not supported
53
+
54
+
55
+
56
+
57
+
# Tutorial
58
+
59
+
```cpp
60
+
#include "llama.h"
61
+
62
+
#include "unicode.h"
63
+
64
+
#include "ggml.h"
65
+
#include "ggml-alloc.h"
66
+
#include "ggml-backend.h"
67
+
68
+
#ifdef GGML_USE_RPC
69
+
# include "ggml-rpc.h"
70
+
#endif
71
+
72
+
#ifdef GGML_USE_CUDA
73
+
# include "ggml-cuda.h"
74
+
#elif defined(GGML_USE_VULKAN)
75
+
# include "ggml-vulkan.h"
76
+
#elif defined(GGML_USE_SYCL)
77
+
# include "ggml-sycl.h"
78
+
#elif defined(GGML_USE_KOMPUTE)
79
+
# include "ggml-kompute.h"
80
+
#endif
81
+
82
+
#ifdef GGML_USE_METAL
83
+
# include "ggml-metal.h"
84
+
#endif
85
+
86
+
// TODO: replace with ggml API call
87
+
#define QK_K 256
88
+
89
+
#ifdef __has_include
90
+
#if __has_include(<unistd.h>)
91
+
#include <unistd.h>
92
+
#if defined(_POSIX_MAPPED_FILES)
93
+
#include <sys/mman.h>
94
+
#include <fcntl.h>
95
+
#endif
96
+
#if defined(_POSIX_MEMLOCK_RANGE)
97
+
#include <sys/resource.h>
98
+
#endif
99
+
#endif
100
+
#endif
101
+
102
+
#if defined(_WIN32)
103
+
#define WIN32_LEAN_AND_MEAN
104
+
#ifndef NOMINMAX
105
+
#define NOMINMAX
106
+
#endif
107
+
#include <windows.h>
108
+
#ifndef PATH_MAX
109
+
#define PATH_MAX MAX_PATH
110
+
#endif
111
+
#include <io.h>
112
+
#endif
113
+
114
+
#include <algorithm>
115
+
#include <array>
116
+
#include <cassert>
117
+
#include <cctype>
118
+
#include <cfloat>
119
+
#include <cinttypes>
120
+
#include <climits>
121
+
#include <cmath>
122
+
#include <cstdarg>
123
+
#include <cstddef>
124
+
#include <cstdint>
125
+
#include <cstdio>
126
+
#include <cstring>
127
+
#include <ctime>
128
+
#include <forward_list>
129
+
#include <fstream>
130
+
#include <functional>
131
+
#include <future>
132
+
#include <initializer_list>
133
+
#include <locale>
134
+
#include <map>
135
+
#include <memory>
136
+
#include <mutex>
137
+
#include <numeric>
138
+
#include <queue>
139
+
#include <random>
140
+
#include <regex>
141
+
#include <set>
142
+
#include <sstream>
143
+
#include <thread>
144
+
#include <type_traits>
145
+
#include <unordered_map>
146
+
#include "ggml-metal.h"
147
+
148
+
#if defined(_MSC_VER)
149
+
#pragma warning(disable: 4244 4267) // possible loss of data
0 commit comments