4
4
config ,
5
5
stdenv ,
6
6
mkShell ,
7
+ runCommand ,
7
8
cmake ,
8
9
ninja ,
9
10
pkg-config ,
10
11
git ,
11
12
python3 ,
12
13
mpi ,
13
- openblas , # TODO: Use the generic ` blas` so users could switch between alternative implementations
14
+ blas ,
14
15
cudaPackages ,
15
16
darwin ,
16
17
rocmPackages ,
23
24
useOpenCL
24
25
useRocm
25
26
useVulkan
26
- ] ,
27
+ ] && blas . meta . available ,
27
28
useCuda ? config . cudaSupport ,
28
29
useMetalKit ? stdenv . isAarch64 && stdenv . isDarwin && ! useOpenCL ,
29
30
useMpi ? false , # Increases the runtime closure size by ~700M
35
36
# It's necessary to consistently use backendStdenv when building with CUDA support,
36
37
# otherwise we get libstdc++ errors downstream.
37
38
effectiveStdenv ? if useCuda then cudaPackages . backendStdenv else stdenv ,
38
- enableStatic ? effectiveStdenv . hostPlatform . isStatic
39
+ enableStatic ? effectiveStdenv . hostPlatform . isStatic ,
40
+ precompileMetalShaders ? false
39
41
} @inputs :
40
42
41
43
let
65
67
strings . optionalString ( suffices != [ ] )
66
68
", accelerated with ${ strings . concatStringsSep ", " suffices } " ;
67
69
70
+ executableSuffix = effectiveStdenv . hostPlatform . extensions . executable ;
71
+
68
72
# TODO: package the Python in this repository in a Nix-like way.
69
73
# It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
70
74
# is PEP 517-compatible, and ensure the correct .dist-info is generated.
71
75
# https://peps.python.org/pep-0517/
76
+ #
77
+ # TODO: Package up each Python script or service appropriately, by making
78
+ # them into "entrypoints"
72
79
llama-python = python3 . withPackages (
73
80
ps : [
74
81
ps . numpy
87
94
]
88
95
) ;
89
96
97
+ xcrunHost = runCommand "xcrunHost" { } ''
98
+ mkdir -p $out/bin
99
+ ln -s /usr/bin/xcrun $out/bin
100
+ '' ;
101
+
90
102
# apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
91
103
# separately
92
104
darwinBuildInputs =
@@ -150,13 +162,18 @@ effectiveStdenv.mkDerivation (
150
162
postPatch = ''
151
163
substituteInPlace ./ggml-metal.m \
152
164
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
153
-
154
- # TODO: Package up each Python script or service appropriately.
155
- # If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
156
- # we could make those *.py into setuptools' entrypoints
157
- substituteInPlace ./*.py --replace "/usr/bin/env python" "${ llama-python } /bin/python"
165
+ substituteInPlace ./ggml-metal.m \
166
+ --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
158
167
'' ;
159
168
169
+ # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
170
+ # `default.metallib` may be compiled with Metal compiler from XCode
171
+ # and we need to escape sandbox on MacOS to access Metal compiler.
172
+ # `xcrun` is used find the path of the Metal compiler, which is varible
173
+ # and not on $PATH
174
+ # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
175
+ __noChroot = effectiveStdenv . isDarwin && useMetalKit && precompileMetalShaders ;
176
+
160
177
nativeBuildInputs =
161
178
[
162
179
cmake
@@ -173,6 +190,8 @@ effectiveStdenv.mkDerivation (
173
190
]
174
191
++ optionals ( effectiveStdenv . hostPlatform . isGnu && enableStatic ) [
175
192
glibc . static
193
+ ] ++ optionals ( effectiveStdenv . isDarwin && useMetalKit && precompileMetalShaders ) [
194
+ xcrunHost
176
195
] ;
177
196
178
197
buildInputs =
@@ -181,6 +200,7 @@ effectiveStdenv.mkDerivation (
181
200
++ optionals useMpi [ mpi ]
182
201
++ optionals useOpenCL [ clblast ]
183
202
++ optionals useRocm rocmBuildInputs
203
+ ++ optionals useBlas [ blas ]
184
204
++ optionals useVulkan vulkanBuildInputs ;
185
205
186
206
cmakeFlags =
@@ -191,7 +211,7 @@ effectiveStdenv.mkDerivation (
191
211
( cmakeBool "CMAKE_SKIP_BUILD_RPATH" true )
192
212
( cmakeBool "LLAMA_BLAS" useBlas )
193
213
( cmakeBool "LLAMA_CLBLAST" useOpenCL )
194
- ( cmakeBool "LLAMA_CUBLAS " useCuda )
214
+ ( cmakeBool "LLAMA_CUDA " useCuda )
195
215
( cmakeBool "LLAMA_HIPBLAS" useRocm )
196
216
( cmakeBool "LLAMA_METAL" useMetalKit )
197
217
( cmakeBool "LLAMA_MPI" useMpi )
@@ -216,14 +236,16 @@ effectiveStdenv.mkDerivation (
216
236
# Should likely use `rocmPackages.clr.gpuTargets`.
217
237
"-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
218
238
]
219
- ++ optionals useMetalKit [ ( lib . cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1" ) ]
220
- ++ optionals useBlas [ ( lib . cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS" ) ] ;
239
+ ++ optionals useMetalKit [
240
+ ( lib . cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1" )
241
+ ( cmakeBool "LLAMA_METAL_EMBED_LIBRARY" ( ! precompileMetalShaders ) )
242
+ ] ;
221
243
222
244
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
223
245
# if they haven't been added yet.
224
246
postInstall = ''
225
- mv $out/bin/main $out/bin/llama
226
- mv $out/bin/server $out/bin/llama-server
247
+ mv $out/bin/main${ executableSuffix } $out/bin/llama${ executableSuffix }
248
+ mv $out/bin/server${ executableSuffix } $out/bin/llama-server${ executableSuffix }
227
249
mkdir -p $out/include
228
250
cp $src/llama.h $out/include/
229
251
'' ;
0 commit comments