Skip to content

vulkan: readd GGML_VULKAN_PERF #13761

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions ggml/src/ggml-vulkan/ggml-vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -839,7 +839,7 @@ class vk_perf_logger {
for (const auto& time : t.second) {
total += time;
}
std::cerr << t.first << ": " << t.second.size() << " x " << (total / t.second.size() / 1000.0) << " ms" << std::endl;
std::cerr << t.first << ": " << t.second.size() << " x " << (total / t.second.size() / 1000.0) << " us" << std::endl;
}

timings.clear();
Expand Down Expand Up @@ -9004,6 +9004,10 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor *

vk_context subctx = ctx->tensor_ctxs[tensor_idx].lock();

#ifdef GGML_VULKAN_PERF
std::chrono::steady_clock::time_point start;
#endif

// always wait for the GPU work to be done for the last submit
if (tensor_idx == subctx->exit_tensor_idx) {
use_fence = true;
Expand All @@ -9013,6 +9017,8 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor *
if (!subctx->seqs.empty()) {
#ifdef GGML_VULKAN_CHECK_RESULTS
ggml_vk_check_results_0(tensor);
#endif
#if defined(GGML_VULKAN_CHECK_RESULTS) || defined(GGML_VULKAN_PERF)
use_fence = true;
#endif

Expand All @@ -9021,6 +9027,10 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor *
memcpy(cpy.dst, cpy.src, cpy.n);
}

#ifdef GGML_VULKAN_PERF
start = std::chrono::steady_clock::now();
#endif

if (almost_ready && !ctx->almost_ready_fence_pending && !use_fence) {
ggml_vk_submit(subctx, ctx->almost_ready_fence);
ctx->almost_ready_fence_pending = true;
Expand All @@ -9037,6 +9047,11 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor *
}

if (tensor_idx == subctx->exit_tensor_idx) {
#ifdef GGML_VULKAN_PERF
auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now() - start);
ctx->device->perf_logger->log_timing(tensor, duration.count());
#endif

// Do staging buffer copies
for (auto& cpy : subctx->out_memcpys) {
memcpy(cpy.dst, cpy.src, cpy.n);
Expand Down Expand Up @@ -9534,7 +9549,7 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
if (enqueued) {
++submitted_nodes;

#ifndef GGML_VULKAN_CHECK_RESULTS
#if !defined(GGML_VULKAN_CHECK_RESULTS) && !defined(GGML_VULKAN_PERF)
if (first_node_in_batch) {
first_node_in_batch = false;
}
Expand Down
Loading