Skip to content

Commit ec9cdb6

Browse files
committed
ggml : do not print perf ops that have not been used at all
1 parent e4422e2 commit ec9cdb6

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

ggml.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11237,7 +11237,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
1123711237
for (int i = 0; i < cgraph->n_nodes; i++) {
1123811238
struct ggml_tensor * node = cgraph->nodes[i];
1123911239

11240-
perf_total_per_op_us[node->op] += node->perf_time_us;
11240+
perf_total_per_op_us[node->op] += MAX(1, node->perf_time_us);
1124111241

1124211242
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
1124311243
i,
@@ -11260,6 +11260,10 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
1126011260
}
1126111261

1126211262
for (int i = 0; i < GGML_OP_COUNT; i++) {
11263+
if (perf_total_per_op_us[i] == 0) {
11264+
continue;
11265+
}
11266+
1126311267
GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n", GGML_OP_LABEL[i], (double) perf_total_per_op_us[i] / 1000.0);
1126411268
}
1126511269

0 commit comments

Comments
 (0)