File tree Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Original file line number Diff line number Diff line change @@ -3130,12 +3130,12 @@ static void llm_load_tensors(
3130
3130
// norm is not performance relevant on its own but keeping it in VRAM reduces data copying
3131
3131
// on Windows however this is detrimental unless everything is on the GPU
3132
3132
#ifndef _WIN32
3133
- backend_norm = LLAMA_BACKEND_OFFLOAD ;
3133
+ backend_norm = llama_backend_offload ;
3134
3134
#else
3135
- backend_norm = n_gpu_layers <= (int ) n_layer + 2 ? GGML_BACKEND_CPU : LLAMA_BACKEND_OFFLOAD ;
3135
+ backend_norm = n_gpu_layers <= (int ) n_layer + 2 ? GGML_BACKEND_CPU : llama_backend_offload ;
3136
3136
#endif // _WIN32
3137
3137
3138
- backend_output = LLAMA_BACKEND_OFFLOAD_SPLIT ;
3138
+ backend_output = llama_backend_offload_split ;
3139
3139
} else {
3140
3140
backend_norm = GGML_BACKEND_CPU;
3141
3141
backend_output = GGML_BACKEND_CPU;
@@ -3163,8 +3163,8 @@ static void llm_load_tensors(
3163
3163
/*
3164
3164
llama_model_loader: - tensor 4: blk.0.attn_output.weight f16 [ 2560, 2560, 1, 1 ]
3165
3165
*/
3166
- const ggml_backend_type backend = int (i) < i_gpu_start ? GGML_BACKEND_CPU : LLAMA_BACKEND_OFFLOAD ; // NOLINT
3167
- const ggml_backend_type backend_split = int (i) < i_gpu_start ? GGML_BACKEND_CPU : LLAMA_BACKEND_OFFLOAD_SPLIT ; // NOLINT
3166
+ const ggml_backend_type backend = int (i) < i_gpu_start ? GGML_BACKEND_CPU : llama_backend_offload ; // NOLINT
3167
+ const ggml_backend_type backend_split = int (i) < i_gpu_start ? GGML_BACKEND_CPU : llama_backend_offload_split ; // NOLINT
3168
3168
3169
3169
auto & layer = model.layers [i];
3170
3170
You can’t perform that action at this time.
0 commit comments