Skip to content

Commit 8ee192c

Browse files
author
mike dupont
committed
enable logging of cuda in app
1 parent a869386 commit 8ee192c

File tree

3 files changed

+12
-9
lines changed

3 files changed

+12
-9
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ endif()
6565

6666
# general
6767
option(LLAMA_STATIC "llama: static link libraries" OFF)
68-
option(LLAMA_NATIVE "llama: enable -march=native flag" ON)
68+
option(LLAMA_NATIVE "llama: enable -march=native flag" OFF)
6969
option(LLAMA_LTO "llama: enable link time optimization" OFF)
7070

7171
# debug

examples/main/main.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -850,6 +850,9 @@ int main(int argc, char ** argv) {
850850
llama_print_timings(ctx);
851851
write_logfile(ctx, params, model, input_tokens, output_ss.str(), output_tokens);
852852

853+
// dump core
854+
int *ptr = 0; *ptr = 1;
855+
853856
if (ctx_guidance) { llama_free(ctx_guidance); }
854857
llama_free(ctx);
855858
llama_free_model(model);

ggml-cuda.cu

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7623,12 +7623,12 @@ static void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1
76237623
#endif
76247624

76257625
// debug helpers
7626-
//printf("src0: %8d %8d %8d %8d\n", src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3]);
7627-
//printf(" %8d %8d %8d %8d\n", src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3]);
7628-
//printf("src1: %8d %8d %8d %8d\n", src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3]);
7629-
//printf(" %8d %8d %8d %8d\n", src1->nb[0], src1->nb[1], src1->nb[2], src1->nb[3]);
7630-
//printf("src0 is contiguous %d, transposed %d, type = %s, name = %s\n", ggml_is_contiguous(src0), ggml_is_transposed(src0), ggml_type_name(src0->type), src0->name);
7631-
//printf("src1 is contiguous %d, transposed %d, type = %s, name = %s\n", ggml_is_contiguous(src1), ggml_is_transposed(src1), ggml_type_name(src1->type), src1->name);
7626+
printf("src0: %8d %8d %8d %8d\n", src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3]);
7627+
printf(" %8d %8d %8d %8d\n", src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3]);
7628+
printf("src1: %8d %8d %8d %8d\n", src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3]);
7629+
printf(" %8d %8d %8d %8d\n", src1->nb[0], src1->nb[1], src1->nb[2], src1->nb[3]);
7630+
printf("src0 is contiguous %d, transposed %d, type = %s, name = %s\n", ggml_is_contiguous(src0), ggml_is_transposed(src0), ggml_type_name(src0->type), src0->name);
7631+
printf("src1 is contiguous %d, transposed %d, type = %s, name = %s\n", ggml_is_contiguous(src1), ggml_is_transposed(src1), ggml_type_name(src1->type), src1->name);
76327632

76337633
if (!split && all_on_device && !use_tensor_cores && src0->type == GGML_TYPE_F16 && ggml_is_permuted(src0) && ggml_is_permuted(src1) && src1->ne[1] == 1) {
76347634
// KQ single-batch
@@ -8056,9 +8056,9 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_
80568056

80578057
if (tensor->op == GGML_OP_MUL_MAT) {
80588058
if (tensor->src[0]->ne[3] != tensor->src[1]->ne[3]) {
8059-
#ifndef NDEBUG
8059+
80608060
fprintf(stderr, "%s: cannot compute %s: src0->ne[3] = %d, src1->ne[3] = %d - fallback to CPU\n", __func__, tensor->name, tensor->src[0]->ne[3], tensor->src[1]->ne[3]);
8061-
#endif
8061+
80628062
return false;
80638063
}
80648064
}

0 commit comments

Comments
 (0)