Skip to content

Commit 0de8b20

Browse files
jeffbolznvggerganov
authored andcommitted
vulkan : fix build for GGML_VULKAN_RUN_TESTS, add TFLOPS to log (ggml/961)
1 parent 544f409 commit 0de8b20

File tree

1 file changed

+13
-6
lines changed

1 file changed

+13
-6
lines changed

ggml/src/ggml-vulkan.cpp

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5013,6 +5013,8 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
50135013
}
50145014
}
50155015

5016+
ggml_pipeline_allocate_descriptor_sets(ctx->device);
5017+
50165018
vk_buffer d_X = ggml_vk_create_buffer_check(ctx->device, sizeof(X_TYPE) * x_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
50175019
vk_buffer d_Y = ggml_vk_create_buffer_check(ctx->device, sizeof(Y_TYPE) * y_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
50185020
vk_buffer d_D = ggml_vk_create_buffer_check(ctx->device, sizeof(float) * d_ne, vk::MemoryPropertyFlagBits::eDeviceLocal);
@@ -5129,7 +5131,9 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t
51295131

51305132
avg_err /= m * n;
51315133

5132-
std::cerr << "TEST " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time / num_it << "ms avg_err=" << avg_err << std::endl;
5134+
double tflops = 2.0*m*n*k*batch*num_it / (time / 1000.0) / (1000.0*1000.0*1000.0*1000.0);
5135+
5136+
std::cerr << "TEST " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time / num_it << "ms " << tflops << " TFLOPS avg_err=" << avg_err << std::endl;
51335137

51345138
if (avg_err > 0.1) {
51355139
std::cerr << "m = " << first_err_m << " n = " << first_err_n << " b = " << first_err_b << std::endl;
@@ -5251,12 +5255,14 @@ static void ggml_vk_test_dequant(ggml_backend_vk_context * ctx, size_t ne, ggml_
52515255

52525256
ggml_pipeline_request_descriptor_sets(ctx->device, p, 1);
52535257

5258+
ggml_pipeline_allocate_descriptor_sets(ctx->device);
5259+
52545260
ggml_vk_buffer_write(qx_buf, 0, qx, qx_sz);
52555261

52565262
vk_context subctx = ggml_vk_create_context(ctx, ctx->device->compute_queue);
52575263
ggml_vk_ctx_begin(ctx->device, subctx);
52585264
const std::vector<uint32_t> pc = { 1, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne, (uint32_t)ne };
5259-
ggml_vk_dispatch_pipeline(ctx, subctx, p, { { qx_buf, 0, qx_sz }, { x_buf, 0, x_sz_f16 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1});
5265+
ggml_vk_dispatch_pipeline(ctx, subctx, p, { vk_subbuffer{ qx_buf, 0, qx_sz }, vk_subbuffer{ x_buf, 0, x_sz_f16 } }, pc.size() * sizeof(int), pc.data(), { (uint32_t)ne, 1, 1});
52605266
ggml_vk_ctx_end(subctx);
52615267

52625268
auto begin = std::chrono::high_resolution_clock::now();
@@ -5383,6 +5389,8 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,
53835389
}
53845390
}
53855391

5392+
ggml_pipeline_allocate_descriptor_sets(ctx->device);
5393+
53865394
ggml_vk_buffer_write(qx_buf, 0, qx, qx_sz);
53875395
ggml_vk_buffer_write(y_buf, 0, y, y_sz);
53885396

@@ -5450,7 +5458,9 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m,
54505458

54515459
avg_err /= m * n;
54525460

5453-
std::cerr << "TEST MMQ " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time_ms / num_it << "ms avg_err=" << avg_err << std::endl;
5461+
double tflops = 2.0*m*n*k*batch*num_it / (time_ms / 1000.0) / (1000.0*1000.0*1000.0*1000.0);
5462+
5463+
std::cerr << "TEST MMQ " << shname << " m=" << m << " n=" << n << " k=" << k << " batch=" << batch << " split_k=" << split_k << " matmul " << time_ms / num_it << "ms " << tflops << " TFLOPS avg_err=" << avg_err << std::endl;
54545464

54555465
if (avg_err > 0.01 || std::isnan(avg_err)) {
54565466
std::cerr << "m = " << first_err_m << " n = " << first_err_n << " b = " << first_err_b << std::endl;
@@ -5502,9 +5512,6 @@ static ggml_tensor_extra_gpu * ggml_vk_tensor_create_extra(ggml_tensor * tensor)
55025512

55035513
static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) {
55045514
#if defined(GGML_VULKAN_RUN_TESTS)
5505-
ctx->staging = ggml_vk_create_buffer_check(ctx->device, 100ul * 1024ul * 1024ul,
5506-
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached,
5507-
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent);
55085515
ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_F32);
55095516
ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_Q4_0);
55105517
ggml_vk_test_dequant(ctx, 7680, GGML_TYPE_Q4_1);

0 commit comments

Comments
 (0)