@@ -268,6 +268,10 @@ struct vk_subbuffer {
268
268
vk_buffer buffer;
269
269
uint64_t offset;
270
270
uint64_t size;
271
+
272
+ operator vk::DescriptorBufferInfo () const {
273
+ return { buffer->buffer , offset, size };
274
+ }
271
275
};
272
276
273
277
struct vk_semaphore {
@@ -1063,13 +1067,14 @@ static vk_subbuffer ggml_vk_subbuffer(vk_buffer& buf) {
1063
1067
1064
1068
static void ggml_vk_sync_buffers (vk_context& ctx) {
1065
1069
VK_LOG_DEBUG (" ggml_vk_sync_buffers()" );
1066
- const std::vector<vk::MemoryBarrier> mem_barriers{ { { vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite }, { vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite } } };
1067
-
1068
1070
ctx->s ->buffer .pipelineBarrier (
1069
1071
ctx->q ->stage_flags ,
1070
1072
ctx->q ->stage_flags ,
1071
1073
{},
1072
- mem_barriers,
1074
+ { {
1075
+ {vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite | vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite},
1076
+ {vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite | vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite}
1077
+ } },
1073
1078
{},
1074
1079
{}
1075
1080
);
@@ -2420,28 +2425,23 @@ static vk_submission ggml_vk_begin_submission(vk_device& device, vk_queue& q, bo
2420
2425
return s;
2421
2426
}
2422
2427
2423
- static void ggml_vk_dispatch_pipeline (ggml_backend_vk_context * ctx, vk_context& subctx, vk_pipeline& pipeline, std::vector<vk_subbuffer>&& buffers, size_t push_constant_size, const void * push_constants, std::array<uint32_t , 3 > elements) {
2428
+
2429
+
2430
+ static void ggml_vk_dispatch_pipeline (ggml_backend_vk_context* ctx, vk_context& subctx, vk_pipeline& pipeline, std::initializer_list<vk::DescriptorBufferInfo> const & descriptor_buffer_infos, size_t push_constant_size, const void * push_constants, std::array<uint32_t , 3 > elements) {
2424
2431
const uint32_t wg0 = CEIL_DIV (elements[0 ], pipeline->wg_denoms [0 ]);
2425
2432
const uint32_t wg1 = CEIL_DIV (elements[1 ], pipeline->wg_denoms [1 ]);
2426
2433
const uint32_t wg2 = CEIL_DIV (elements[2 ], pipeline->wg_denoms [2 ]);
2427
2434
VK_LOG_DEBUG (" ggml_vk_dispatch_pipeline(" << pipeline->name << " , {" ;
2428
- for (auto & buffer : buffers ) {
2429
- std::cerr << " (" << buffer. buffer << " , " << buffer.offset << " , " << buffer.size << " ), " ;
2435
+ for (auto & buffer : descriptor_buffer_infos ) {
2436
+ std::cerr << " (" << buffer << " , " << buffer.offset << " , " << buffer.size << " ), " ;
2430
2437
}
2431
2438
std::cerr << " }, (" << wg0 << " ," << wg1 << " ," << wg2 << " ))" );
2432
- std::vector<vk::DescriptorBufferInfo> descriptor_buffer_infos;
2433
- std::vector<vk::WriteDescriptorSet> write_descriptor_sets;
2434
2439
GGML_ASSERT (pipeline->descriptor_set_idx < pipeline->descriptor_sets .size ());
2435
- GGML_ASSERT (buffers.size () == pipeline->parameter_count );
2436
- vk::DescriptorSet& descriptor_set = pipeline->descriptor_sets [pipeline->descriptor_set_idx ++];
2437
- for (uint32_t i = 0 ; i < pipeline->parameter_count ; i++) {
2438
- descriptor_buffer_infos.push_back ({buffers[i].buffer ->buffer , buffers[i].offset , buffers[i].size });
2439
- }
2440
- for (uint32_t i = 0 ; i < pipeline->parameter_count ; i++) {
2441
- write_descriptor_sets.push_back ({descriptor_set, i, 0 , 1 , vk::DescriptorType::eStorageBuffer, nullptr , &descriptor_buffer_infos[i]});
2442
- }
2440
+ GGML_ASSERT (descriptor_buffer_infos.size () == pipeline->parameter_count );
2443
2441
2444
- ctx->device ->device .updateDescriptorSets (write_descriptor_sets, {});
2442
+ vk::DescriptorSet& descriptor_set = pipeline->descriptor_sets [pipeline->descriptor_set_idx ++];
2443
+ vk::WriteDescriptorSet write_descriptor_set{ descriptor_set, 0 , 0 , pipeline->parameter_count , vk::DescriptorType::eStorageBuffer, nullptr , descriptor_buffer_infos.begin () };
2444
+ ctx->device ->device .updateDescriptorSets ({ write_descriptor_set }, {});
2445
2445
2446
2446
subctx->s ->buffer .pushConstants (pipeline->layout , vk::ShaderStageFlagBits::eCompute, 0 , push_constant_size, push_constants);
2447
2447
subctx->s ->buffer .bindPipeline (vk::PipelineBindPoint::eCompute, pipeline->pipeline );
@@ -3123,7 +3123,7 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub
3123
3123
} else if (qx_needs_dequant) {
3124
3124
const std::vector<uint32_t > pc = { (uint32_t )ne01, (uint32_t )ne10, (uint32_t )ne10, (uint32_t )ne10, (uint32_t )(ggml_nelements (src0)) };
3125
3125
ggml_vk_sync_buffers (subctx);
3126
- ggml_vk_dispatch_pipeline (ctx, subctx, to_fp16_vk_0, { { d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, { d_X, 0 , x_sz * ne02 * ne03 } }, pc.size () * sizeof (uint32_t ), pc.data (), { (uint32_t )(x_ne * ne02 * ne03), 1 , 1 });
3126
+ ggml_vk_dispatch_pipeline (ctx, subctx, to_fp16_vk_0, { vk_subbuffer { d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, vk_subbuffer { d_X, 0 , x_sz * ne02 * ne03 } }, pc.size () * sizeof (uint32_t ), pc.data (), { (uint32_t )(x_ne * ne02 * ne03), 1 , 1 });
3127
3127
}
3128
3128
if (y_non_contig) {
3129
3129
ggml_vk_cpy_to_contiguous (ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0 , VK_WHOLE_SIZE });
@@ -3312,7 +3312,7 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context&
3312
3312
};
3313
3313
ggml_vk_sync_buffers (subctx);
3314
3314
ggml_vk_dispatch_pipeline (ctx, subctx, dmmv,
3315
- { { d_X, x_buf_offset, x_sz * ne02 * ne03 }, { d_Y, y_buf_offset, y_sz * ne12 * ne13 }, { d_D, d_buf_offset, d_sz * ne22 * ne23} },
3315
+ { vk_subbuffer { d_X, x_buf_offset, x_sz * ne02 * ne03 }, vk_subbuffer { d_Y, y_buf_offset, y_sz * ne12 * ne13 }, vk_subbuffer { d_D, d_buf_offset, d_sz * ne22 * ne23} },
3316
3316
sizeof (vk_mat_vec_push_constants), &pc, { groups_x, (uint32_t )(ne12 * ne13), groups_z });
3317
3317
}
3318
3318
@@ -3384,7 +3384,7 @@ static void ggml_vk_mul_mat_vec_p021_f16_f32(ggml_backend_vk_context * ctx, vk_c
3384
3384
// compute
3385
3385
const std::array<uint32_t , 6 > pc = { (uint32_t )ne00, (uint32_t )ne01, (uint32_t )ne02, (uint32_t )ne12, (uint32_t )(qy_shader_offset / ggml_type_size (src1->type )), (uint32_t )(d_shader_offset / ggml_type_size (dst->type )) };
3386
3386
ggml_vk_sync_buffers (subctx);
3387
- ggml_vk_dispatch_pipeline (ctx, subctx, ctx->device ->pipeline_mul_mat_vec_p021_f16_f32 , { { d_Qx, qx_buf_offset, qx_sz }, { d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, { d_D, d_buffer_offset, d_sz + d_shader_offset } }, 6 * sizeof (uint32_t ), &pc, { 1 , (uint32_t )ne01, (uint32_t )ne12 });
3387
+ ggml_vk_dispatch_pipeline (ctx, subctx, ctx->device ->pipeline_mul_mat_vec_p021_f16_f32 , { vk_subbuffer { d_Qx, qx_buf_offset, qx_sz }, vk_subbuffer { d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, vk_subbuffer { d_D, d_buffer_offset, d_sz + d_shader_offset } }, 6 * sizeof (uint32_t ), &pc, { 1 , (uint32_t )ne01, (uint32_t )ne12 });
3388
3388
}
3389
3389
3390
3390
static void ggml_vk_mul_mat_vec_nc_f16_f32 (ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
@@ -3459,7 +3459,8 @@ static void ggml_vk_mul_mat_vec_nc_f16_f32(ggml_backend_vk_context * ctx, vk_con
3459
3459
// compute
3460
3460
const std::array<uint32_t , 7 > pc = { (uint32_t )ne00, (uint32_t )ne01, row_stride_x, channel_stride_x, (uint32_t )(ne12 / ne02), (uint32_t )(qy_shader_offset / ggml_type_size (src1->type )), (uint32_t )(d_shader_offset / ggml_type_size (dst->type )) };
3461
3461
ggml_vk_sync_buffers (subctx);
3462
- ggml_vk_dispatch_pipeline (ctx, subctx, ctx->device ->pipeline_mul_mat_vec_nc_f16_f32 , { { d_Qx, qx_buf_offset, qx_sz }, { d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, { d_D, d_buffer_offset, d_sz + d_shader_offset } }, 7 * sizeof (uint32_t ), &pc, { 1 , (uint32_t )ne01, (uint32_t )ne12 });
3462
+ ggml_vk_dispatch_pipeline (ctx, subctx, ctx->device ->pipeline_mul_mat_vec_nc_f16_f32 ,
3463
+ { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz }, vk_subbuffer{ d_Qy, qy_buffer_offset, qy_sz + qy_shader_offset }, vk_subbuffer{ d_D, d_buffer_offset, d_sz + d_shader_offset } }, 7 * sizeof (uint32_t ), &pc, { 1 , (uint32_t )ne01, (uint32_t )ne12 });
3463
3464
}
3464
3465
3465
3466
static void ggml_vk_mul_mat (ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
@@ -3634,7 +3635,8 @@ static void ggml_vk_mul_mat_id_q_f16(ggml_backend_vk_context * ctx, vk_context&
3634
3635
} else if (qx_needs_dequant) {
3635
3636
const std::vector<uint32_t > pc = { (uint32_t )ne01, (uint32_t )ne10, (uint32_t )ne10, (uint32_t )ne10, (uint32_t )(ggml_nelements (src0)) };
3636
3637
ggml_vk_sync_buffers (subctx);
3637
- ggml_vk_dispatch_pipeline (ctx, subctx, to_fp16_vk_0, { { d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, { d_X, 0 , x_sz * ne02 * ne03 } }, pc.size () * sizeof (uint32_t ), pc.data (), { (uint32_t )(x_ne * ne02 * ne03), 1 , 1 });
3638
+ ggml_vk_dispatch_pipeline (ctx, subctx, to_fp16_vk_0,
3639
+ { vk_subbuffer{ d_Qx, qx_buf_offset, qx_sz * ne02 * ne03 }, vk_subbuffer{ d_X, 0 , x_sz * ne02 * ne03 } }, pc.size () * sizeof (uint32_t ), pc.data (), { (uint32_t )(x_ne * ne02 * ne03), 1 , 1 });
3638
3640
}
3639
3641
if (y_non_contig) {
3640
3642
ggml_vk_cpy_to_contiguous (ctx, subctx, to_fp16_vk_1, src1, { d_Qy, qy_buf_offset, VK_WHOLE_SIZE }, { d_Y, 0 , VK_WHOLE_SIZE });
@@ -3834,7 +3836,8 @@ static void ggml_vk_mul_mat_vec_id_q_f16(ggml_backend_vk_context * ctx, vk_conte
3834
3836
};
3835
3837
ggml_vk_sync_buffers (subctx);
3836
3838
ggml_vk_dispatch_pipeline (ctx, subctx, dmmv,
3837
- { { d_X, x_buf_offset, x_sz * ne02 * ne03 }, { d_Y, y_buf_offset, y_sz * ne12 * ne13 }, { d_D, d_buf_offset, d_sz * ne22 * ne23}, { d_ids, ids_buf_offset, ids_sz } },
3839
+ { vk_subbuffer{ d_X, x_buf_offset, x_sz * ne02 * ne03 },
3840
+ vk_subbuffer{ d_Y, y_buf_offset, y_sz * ne12 * ne13 }, vk_subbuffer{ d_D, d_buf_offset, d_sz * ne22 * ne23}, vk_subbuffer{ d_ids, ids_buf_offset, ids_sz } },
3838
3841
sizeof (vk_mat_vec_id_push_constants), &pc, { groups_x, (uint32_t )nei0, groups_z });
3839
3842
}
3840
3843
@@ -4381,7 +4384,7 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
4381
4384
}
4382
4385
4383
4386
ggml_vk_sync_buffers (subctx);
4384
- ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { { d_X, x_buf_offset, x_sz }, subbuf_y, { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
4387
+ ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { vk_subbuffer { d_X, x_buf_offset, x_sz }, subbuf_y, vk_subbuffer { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
4385
4388
} else if (op == GGML_OP_ROPE) {
4386
4389
// Empty src2 is possible in rope, but the shader needs a buffer
4387
4390
vk_subbuffer subbuf_z;
@@ -4392,20 +4395,20 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
4392
4395
}
4393
4396
4394
4397
ggml_vk_sync_buffers (subctx);
4395
- ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { { d_X, x_buf_offset, x_sz }, { d_Y, y_buf_offset, y_sz }, subbuf_z, { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
4398
+ ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { vk_subbuffer { d_X, x_buf_offset, x_sz }, vk_subbuffer { d_Y, y_buf_offset, y_sz }, subbuf_z, vk_subbuffer { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
4396
4399
} else if (op == GGML_OP_IM2COL) {
4397
4400
// im2col uses only src1 and dst buffers
4398
4401
ggml_vk_sync_buffers (subctx);
4399
- ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { { d_Y, y_buf_offset, y_sz }, { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
4402
+ ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { vk_subbuffer { d_Y, y_buf_offset, y_sz }, vk_subbuffer { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
4400
4403
} else if (use_src2) {
4401
4404
ggml_vk_sync_buffers (subctx);
4402
- ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { { d_X, x_buf_offset, x_sz }, { d_Y, y_buf_offset, y_sz }, { d_Z, z_buf_offset, z_sz }, { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
4405
+ ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { vk_subbuffer { d_X, x_buf_offset, x_sz }, vk_subbuffer { d_Y, y_buf_offset, y_sz }, vk_subbuffer { d_Z, z_buf_offset, z_sz }, vk_subbuffer { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
4403
4406
} else if (use_src1) {
4404
4407
ggml_vk_sync_buffers (subctx);
4405
- ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { { d_X, x_buf_offset, x_sz }, { d_Y, y_buf_offset, y_sz }, { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
4408
+ ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { vk_subbuffer { d_X, x_buf_offset, x_sz }, vk_subbuffer { d_Y, y_buf_offset, y_sz }, vk_subbuffer { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
4406
4409
} else {
4407
4410
ggml_vk_sync_buffers (subctx);
4408
- ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { { d_X, x_buf_offset, x_sz }, { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
4411
+ ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { vk_subbuffer { d_X, x_buf_offset, x_sz }, vk_subbuffer { d_D, d_buf_offset, d_sz } }, sizeof (PC), &pc, elements);
4409
4412
}
4410
4413
} else {
4411
4414
GGML_ASSERT (op != GGML_OP_SOFT_MAX);
@@ -4442,10 +4445,10 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
4442
4445
4443
4446
if (use_src1) {
4444
4447
ggml_vk_sync_buffers (subctx);
4445
- ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { { d_X, x_buf_offset + x_offset, x_sz }, { d_Y, y_buf_offset + y_offset, y_sz }, { d_D, d_buf_offset + d_offset, d_sz } }, sizeof (PC), &pc, elements);
4448
+ ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { vk_subbuffer { d_X, x_buf_offset + x_offset, x_sz }, vk_subbuffer { d_Y, y_buf_offset + y_offset, y_sz }, vk_subbuffer { d_D, d_buf_offset + d_offset, d_sz } }, sizeof (PC), &pc, elements);
4446
4449
} else {
4447
4450
ggml_vk_sync_buffers (subctx);
4448
- ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { { d_X, x_buf_offset + x_offset, x_sz }, { d_D, d_buf_offset + d_offset, d_sz } }, sizeof (PC), &pc, elements);
4451
+ ggml_vk_dispatch_pipeline (ctx, subctx, pipeline, { vk_subbuffer { d_X, x_buf_offset + x_offset, x_sz }, vk_subbuffer { d_D, d_buf_offset + d_offset, d_sz } }, sizeof (PC), &pc, elements);
4449
4452
}
4450
4453
}
4451
4454
}
0 commit comments