Skip to content

Commit e4422e2

Browse files
committed
ggml : better PERF prints + support "LLAMA_PERF=1 make"
1 parent 53c8434 commit e4422e2

File tree

3 files changed

+9
-3
lines changed

3 files changed

+9
-3
lines changed

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,10 @@ ifdef LLAMA_GPROF
117117
CFLAGS += -pg
118118
CXXFLAGS += -pg
119119
endif
120+
ifdef LLAMA_PERF
121+
CFLAGS += -DGGML_PERF
122+
CXXFLAGS += -DGGML_PERF
123+
endif
120124
ifneq ($(filter aarch64%,$(UNAME_M)),)
121125
CFLAGS += -mcpu=native
122126
CXXFLAGS += -mcpu=native

ggml.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11239,7 +11239,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
1123911239

1124011240
perf_total_per_op_us[node->op] += node->perf_time_us;
1124111241

11242-
GGML_PRINT(" - %3d: [ %" PRId64 ", %" PRId64 ", %" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
11242+
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
1124311243
i,
1124411244
node->ne[0], node->ne[1], node->ne[2],
1124511245
GGML_OP_LABEL[node->op], node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
@@ -11253,7 +11253,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
1125311253
for (int i = 0; i < cgraph->n_leafs; i++) {
1125411254
struct ggml_tensor * node = cgraph->leafs[i];
1125511255

11256-
GGML_PRINT(" - %3d: [ %" PRId64 ", %" PRId64 "] %8s\n",
11256+
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
1125711257
i,
1125811258
node->ne[0], node->ne[1],
1125911259
GGML_OP_LABEL[node->op]);

llama.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1250,9 +1250,11 @@ static bool llama_eval_internal(
12501250
ggml_build_forward_expand(&gf, inpL);
12511251
ggml_graph_compute (ctx0, &gf);
12521252

1253+
#ifdef GGML_PERF
12531254
// print timing information per ggml operation (for debugging purposes)
12541255
// requires GGML_PERF to be defined
1255-
//ggml_graph_print(&gf);
1256+
ggml_graph_print(&gf);
1257+
#endif
12561258

12571259
// plot the computation graph in dot format (for debugging purposes)
12581260
//if (n_past%100 == 0) {

0 commit comments

Comments
 (0)