File tree Expand file tree Collapse file tree 1 file changed +8
-0
lines changed Expand file tree Collapse file tree 1 file changed +8
-0
lines changed Original file line number Diff line number Diff line change @@ -3020,6 +3020,14 @@ static void llm_load_tensors(
3020
3020
ggml_backend_type backend_norm;
3021
3021
ggml_backend_type backend_output;
3022
3022
3023
+ // Don't allow for offloading of more than 33 layers.
3024
+ // Offloading 34 layers causes model to respond with letter 'E'
3025
+ // Offloading 35 layers doesn't work because of missing cuda implementation for rope:
3026
+ // GGML_ASSERT: ggml-cuda.cu:6402: ne00 == n_dims && "ne00 != n_dims is not implemented for CUDA yet"
3027
+ if (n_gpu_layers > 33 ) {
3028
+ n_gpu_layers = 33 ;
3029
+ }
3030
+
3023
3031
if (n_gpu_layers > int (n_layer)) {
3024
3032
// norm is not performance relevant on its own but keeping it in VRAM reduces data copying
3025
3033
// on Windows however this is detrimental unless everything is on the GPU
You can’t perform that action at this time.
0 commit comments