Skip to content

Commit f5aef46

Browse files
committed
ggml-opencl, llama: using reserve() if count already known
1 parent b228aba commit f5aef46

File tree

2 files changed

+9
-1
lines changed

2 files changed

+9
-1
lines changed

ggml-opencl.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1835,7 +1835,10 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
18351835
CL_CHECK(clEnqueueNDRangeKernel(queue, *to_fp32_cl, 1, &offset, &global, local > 0 ? &local : NULL, events.size(), !events.empty() ? events.data() : NULL, NULL));
18361836
}
18371837

1838-
for (int64_t i12 = i02 * r2, e12 = i12 + r2; i12 < e12; i12++) {
1838+
int64_t i12 = i02 * r2;
1839+
int64_t e12 = i12 + r2;
1840+
events.reserve(e12 - i12);
1841+
while (i12 < e12) {
18391842
if (mul_mat_vec) { // specialized dequantize_mul_mat_vec kernel
18401843
// copy src1 to device
18411844
events.emplace_back();
@@ -1885,6 +1888,7 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
18851888

18861889
ev_idx = 0;
18871890
events.clear();
1891+
i12++;
18881892
}
18891893
}
18901894
}

llama.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6116,6 +6116,7 @@ static bool llm_load_tensors(
61166116
mlock_buf->init (ggml_backend_buffer_get_base(buf));
61176117
mlock_buf->grow_to(ggml_backend_buffer_get_size(buf));
61186118
}
6119+
bufs.reserve(ml.files.size());
61196120
for (uint32_t idx = 0; idx < ml.files.size(); idx++) {
61206121
bufs.emplace(idx, buf);
61216122
}
@@ -16062,6 +16063,7 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
1606216063
}
1606316064

1606416065
// make tensors
16066+
cvec.tensors.reserve(model.hparams.n_layer);
1606516067
cvec.tensors.push_back(nullptr); // there's never a tensor for layer 0
1606616068
for (size_t il = 1; il < model.hparams.n_layer; il++) {
1606716069
struct ggml_context * ctx = ctx_map.at(model.buft_layer[il].buft);
@@ -16070,6 +16072,8 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
1607016072
}
1607116073

1607216074
// allocate tensors / buffers and zero
16075+
cvec.ctxs.reserve(ctx_map.size());
16076+
cvec.bufs.reserve(ctx_map.size());
1607316077
for (auto it : ctx_map) {
1607416078
ggml_backend_buffer_type_t buft = it.first;
1607516079
ggml_context * ctx = it.second;

0 commit comments

Comments
 (0)