ggml-opencl, llama: using reserve() if count already known

GermanAizek · GermanAizek · commit f5aef4657e3b · 2024-05-13T20:33:35.000-05:00
diff --git a/ggml-opencl.cpp b/ggml-opencl.cpp
@@ -1835,7 +1835,10 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
                     CL_CHECK(clEnqueueNDRangeKernel(queue, *to_fp32_cl, 1, &offset, &global, local > 0 ? &local : NULL, events.size(), !events.empty() ? events.data() : NULL, NULL));
                 }
 
-                for (int64_t i12 = i02 * r2, e12 = i12 + r2; i12 < e12; i12++) {
+                int64_t i12 = i02 * r2;
+                int64_t e12 = i12 + r2;
+                events.reserve(e12 - i12);
+                while (i12 < e12) {
                     if (mul_mat_vec) { // specialized dequantize_mul_mat_vec kernel
                         // copy src1 to device
                         events.emplace_back();
@@ -1885,6 +1888,7 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
 
                     ev_idx = 0;
                     events.clear();
+                    i12++;
                 }
             }
         }
diff --git a/llama.cpp b/llama.cpp
@@ -6116,6 +6116,7 @@ static bool llm_load_tensors(
                 mlock_buf->init   (ggml_backend_buffer_get_base(buf));
                 mlock_buf->grow_to(ggml_backend_buffer_get_size(buf));
             }
+            bufs.reserve(ml.files.size());
             for (uint32_t idx = 0; idx < ml.files.size(); idx++) {
                 bufs.emplace(idx, buf);
             }
@@ -16062,6 +16063,7 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
     }
 
     // make tensors
+    cvec.tensors.reserve(model.hparams.n_layer);
     cvec.tensors.push_back(nullptr); // there's never a tensor for layer 0
     for (size_t il = 1; il < model.hparams.n_layer; il++) {
         struct ggml_context * ctx = ctx_map.at(model.buft_layer[il].buft);
@@ -16070,6 +16072,8 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const
     }
 
     // allocate tensors / buffers and zero
+    cvec.ctxs.reserve(ctx_map.size());
+    cvec.bufs.reserve(ctx_map.size());
     for (auto it : ctx_map) {
         ggml_backend_buffer_type_t buft = it.first;
         ggml_context * ctx = it.second;

Original file line number	Diff line number	Diff line change
`@@ -1835,7 +1835,10 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *`
`1835`	`1835`	`CL_CHECK(clEnqueueNDRangeKernel(queue, *to_fp32_cl, 1, &offset, &global, local > 0 ? &local : NULL, events.size(), !events.empty() ? events.data() : NULL, NULL));`
`1836`	`1836`	`}`
`1837`	`1837`
`1838`		`- for (int64_t i12 = i02 * r2, e12 = i12 + r2; i12 < e12; i12++) {`
	`1838`	`+ int64_t i12 = i02 * r2;`
	`1839`	`+ int64_t e12 = i12 + r2;`
	`1840`	`+ events.reserve(e12 - i12);`
	`1841`	`+ while (i12 < e12) {`
`1839`	`1842`	`if (mul_mat_vec) { // specialized dequantize_mul_mat_vec kernel`
`1840`	`1843`	`// copy src1 to device`
`1841`	`1844`	`events.emplace_back();`
`@@ -1885,6 +1888,7 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *`
`1885`	`1888`
`1886`	`1889`	`ev_idx = 0;`
`1887`	`1890`	`events.clear();`
	`1891`	`+ i12++;`
`1888`	`1892`	`}`
`1889`	`1893`	`}`
`1890`	`1894`	`}`
Original file line number	Diff line number	Diff line change
`@@ -6116,6 +6116,7 @@ static bool llm_load_tensors(`
`6116`	`6116`	`mlock_buf->init (ggml_backend_buffer_get_base(buf));`
`6117`	`6117`	`mlock_buf->grow_to(ggml_backend_buffer_get_size(buf));`
`6118`	`6118`	`}`
	`6119`	`+ bufs.reserve(ml.files.size());`
`6119`	`6120`	`for (uint32_t idx = 0; idx < ml.files.size(); idx++) {`
`6120`	`6121`	`bufs.emplace(idx, buf);`
`6121`	`6122`	`}`
`@@ -16062,6 +16063,7 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const`
`16062`	`16063`	`}`
`16063`	`16064`
`16064`	`16065`	`// make tensors`
	`16066`	`+ cvec.tensors.reserve(model.hparams.n_layer);`
`16065`	`16067`	`cvec.tensors.push_back(nullptr); // there's never a tensor for layer 0`
`16066`	`16068`	`for (size_t il = 1; il < model.hparams.n_layer; il++) {`
`16067`	`16069`	`struct ggml_context * ctx = ctx_map.at(model.buft_layer[il].buft);`
`@@ -16070,6 +16072,8 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const`
`16070`	`16072`	`}`
`16071`	`16073`
`16072`	`16074`	`// allocate tensors / buffers and zero`
	`16075`	`+ cvec.ctxs.reserve(ctx_map.size());`
	`16076`	`+ cvec.bufs.reserve(ctx_map.size());`
`16073`	`16077`	`for (auto it : ctx_map) {`
`16074`	`16078`	`ggml_backend_buffer_type_t buft = it.first;`
`16075`	`16079`	`ggml_context * ctx = it.second;`