@@ -5013,6 +5013,8 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
5013
5013
}
5014
5014
}
5015
5015
5016
+ ggml_pipeline_allocate_descriptor_sets (ctx->device );
5017
+
5016
5018
vk_buffer d_X = ggml_vk_create_buffer_check (ctx->device , sizeof (X_TYPE) * x_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
5017
5019
vk_buffer d_Y = ggml_vk_create_buffer_check (ctx->device , sizeof (Y_TYPE) * y_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
5018
5020
vk_buffer d_D = ggml_vk_create_buffer_check (ctx->device , sizeof (float ) * d_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
@@ -5129,7 +5131,9 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
5129
5131
5130
5132
avg_err /= m * n;
5131
5133
5132
- std::cerr << " TEST " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time / num_it << " ms avg_err=" << avg_err << std::endl;
5134
+ double tflops = 2.0 *m*n*k*batch*num_it / (time / 1000.0 ) / (1000.0 *1000.0 *1000.0 *1000.0 );
5135
+
5136
+ std::cerr << " TEST " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time / num_it << " ms " << tflops << " TFLOPS avg_err=" << avg_err << std::endl;
5133
5137
5134
5138
if (avg_err > 0.1 ) {
5135
5139
std::cerr << " m = " << first_err_m << " n = " << first_err_n << " b = " << first_err_b << std::endl;
@@ -5251,12 +5255,14 @@ static void ggml_vk_test_dequant(ggml_backend_vk_context * ctx, size_t ne, ggml_
5251
5255
5252
5256
ggml_pipeline_request_descriptor_sets (ctx->device , p, 1 );
5253
5257
5258
+ ggml_pipeline_allocate_descriptor_sets (ctx->device );
5259
+
5254
5260
ggml_vk_buffer_write (qx_buf, 0 , qx, qx_sz);
5255
5261
5256
5262
vk_context subctx = ggml_vk_create_context (ctx, ctx->device ->compute_queue );
5257
5263
ggml_vk_ctx_begin (ctx->device , subctx);
5258
5264
const std::vector<uint32_t > pc = { 1 , (uint32_t )ne, (uint32_t )ne, (uint32_t )ne, (uint32_t )ne };
5259
- ggml_vk_dispatch_pipeline (ctx, subctx, p, { { qx_buf, 0 , qx_sz }, { x_buf, 0 , x_sz_f16 } }, pc.size () * sizeof (int ), pc.data (), { (uint32_t )ne, 1 , 1 });
5265
+ ggml_vk_dispatch_pipeline (ctx, subctx, p, { vk_subbuffer { qx_buf, 0 , qx_sz }, vk_subbuffer { x_buf, 0 , x_sz_f16 } }, pc.size () * sizeof (int ), pc.data (), { (uint32_t )ne, 1 , 1 });
5260
5266
ggml_vk_ctx_end (subctx);
5261
5267
5262
5268
auto begin = std::chrono::high_resolution_clock::now ();
@@ -5383,6 +5389,8 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,
5383
5389
}
5384
5390
}
5385
5391
5392
+ ggml_pipeline_allocate_descriptor_sets (ctx->device );
5393
+
5386
5394
ggml_vk_buffer_write (qx_buf, 0 , qx, qx_sz);
5387
5395
ggml_vk_buffer_write (y_buf, 0 , y, y_sz);
5388
5396
@@ -5450,7 +5458,9 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,
5450
5458
5451
5459
avg_err /= m * n;
5452
5460
5453
- std::cerr << " TEST MMQ " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time_ms / num_it << " ms avg_err=" << avg_err << std::endl;
5461
+ double tflops = 2.0 *m*n*k*batch*num_it / (time_ms / 1000.0 ) / (1000.0 *1000.0 *1000.0 *1000.0 );
5462
+
5463
+ std::cerr << " TEST MMQ " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time_ms / num_it << " ms " << tflops << " TFLOPS avg_err=" << avg_err << std::endl;
5454
5464
5455
5465
if (avg_err > 0.01 || std::isnan (avg_err)) {
5456
5466
std::cerr << " m = " << first_err_m << " n = " << first_err_n << " b = " << first_err_b << std::endl;
@@ -5502,9 +5512,6 @@ static ggml_tensor_extra_gpu * ggml_vk_tensor_create_extra(ggml_tensor * tensor)
5502
5512
5503
5513
static void ggml_vk_preallocate_buffers (ggml_backend_vk_context * ctx) {
5504
5514
#if defined(GGML_VULKAN_RUN_TESTS)
5505
- ctx->staging = ggml_vk_create_buffer_check (ctx->device , 100ul * 1024ul * 1024ul ,
5506
- vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached,
5507
- vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
5508
5515
ggml_vk_test_dequant (ctx, 7680 , GGML_TYPE_F32);
5509
5516
ggml_vk_test_dequant (ctx, 7680 , GGML_TYPE_Q4_0);
5510
5517
ggml_vk_test_dequant (ctx, 7680 , GGML_TYPE_Q4_1);
0 commit comments