@@ -7623,12 +7623,12 @@ static void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1
7623
7623
#endif
7624
7624
7625
7625
// debug helpers
7626
- // printf("src0: %8d %8d %8d %8d\n", src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3]);
7627
- // printf(" %8d %8d %8d %8d\n", src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3]);
7628
- // printf("src1: %8d %8d %8d %8d\n", src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3]);
7629
- // printf(" %8d %8d %8d %8d\n", src1->nb[0], src1->nb[1], src1->nb[2], src1->nb[3]);
7630
- // printf("src0 is contiguous %d, transposed %d, type = %s, name = %s\n", ggml_is_contiguous(src0), ggml_is_transposed(src0), ggml_type_name(src0->type), src0->name);
7631
- // printf("src1 is contiguous %d, transposed %d, type = %s, name = %s\n", ggml_is_contiguous(src1), ggml_is_transposed(src1), ggml_type_name(src1->type), src1->name);
7626
+ printf (" src0: %8d %8d %8d %8d\n " , src0->ne [0 ], src0->ne [1 ], src0->ne [2 ], src0->ne [3 ]);
7627
+ printf (" %8d %8d %8d %8d\n " , src0->nb [0 ], src0->nb [1 ], src0->nb [2 ], src0->nb [3 ]);
7628
+ printf (" src1: %8d %8d %8d %8d\n " , src1->ne [0 ], src1->ne [1 ], src1->ne [2 ], src1->ne [3 ]);
7629
+ printf (" %8d %8d %8d %8d\n " , src1->nb [0 ], src1->nb [1 ], src1->nb [2 ], src1->nb [3 ]);
7630
+ printf (" src0 is contiguous %d, transposed %d, type = %s, name = %s\n " , ggml_is_contiguous (src0), ggml_is_transposed (src0), ggml_type_name (src0->type ), src0->name );
7631
+ printf (" src1 is contiguous %d, transposed %d, type = %s, name = %s\n " , ggml_is_contiguous (src1), ggml_is_transposed (src1), ggml_type_name (src1->type ), src1->name );
7632
7632
7633
7633
if (!split && all_on_device && !use_tensor_cores && src0->type == GGML_TYPE_F16 && ggml_is_permuted (src0) && ggml_is_permuted (src1) && src1->ne [1 ] == 1 ) {
7634
7634
// KQ single-batch
@@ -8056,9 +8056,9 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_
8056
8056
8057
8057
if (tensor->op == GGML_OP_MUL_MAT) {
8058
8058
if (tensor->src [0 ]->ne [3 ] != tensor->src [1 ]->ne [3 ]) {
8059
- # ifndef NDEBUG
8059
+
8060
8060
fprintf (stderr, " %s: cannot compute %s: src0->ne[3] = %d, src1->ne[3] = %d - fallback to CPU\n " , __func__, tensor->name , tensor->src [0 ]->ne [3 ], tensor->src [1 ]->ne [3 ]);
8061
- # endif
8061
+
8062
8062
return false ;
8063
8063
}
8064
8064
}
0 commit comments