fixed shape not found error

mqy · mqy · commit 9510af7e2526 · 2023-05-29T09:00:15.000+08:00
diff --git a/examples/mulmat-tune/README.md b/examples/mulmat-tune/README.md
@@ -269,7 +269,6 @@ $ ./mulmat-tune bench --m_num 2
 32000 4096
  16       26   113438 0 0        0 0   100848    66797 0
  32       37   207217 0 0        0 0    87275    74103 0
-...
  ```
 
 See example files in dir [bench-out](bench-out) for details.
diff --git a/examples/mulmat-tune/mulmat-tune-tool.c b/examples/mulmat-tune/mulmat-tune-tool.c
@@ -832,7 +832,8 @@ static void test_ggml_mulmat_tune_estimate_time(void) {
 
         const struct test_data_t *e = &test_data[i];
 
-        ggml_mulmat_tune_estimate_time(&tune, e->M, N, K, e->nth, &time_stats);
+        int rc = ggml_mulmat_tune_estimate_time(&tune, e->M, N, K, e->nth, &time_stats);
+        GGML_ASSERT(rc);
 
         bool pass = true;
         for (int i_profile = 0; i_profile < 2; i_profile++) {
diff --git a/examples/mulmat-tune/mulmat-tune.c b/examples/mulmat-tune/mulmat-tune.c
@@ -213,7 +213,7 @@ int ggml_mulmat_tune_write_data(const struct ggml_mulmat_tune *tune, FILE *fp) {
     return 0;
 }
 
-void ggml_mulmat_tune_estimate_time(
+int ggml_mulmat_tune_estimate_time(
     const struct ggml_mulmat_tune *tune, const int M, const int N, const int K,
     const int nth, struct ggml_mulmat_tune_time_stats *time_stats) {
     int shape_index = -1;
@@ -225,8 +225,7 @@ void ggml_mulmat_tune_estimate_time(
     }
 
     if (shape_index < 0) {
-        fprintf(stderr, "%s: shape not found, N: %d, K: %d\n", __func__, N, K);
-        abort();
+        return -1;
     }
 
     time_stats->n_profiles = tune->n_profiles;
@@ -310,6 +309,8 @@ void ggml_mulmat_tune_estimate_time(
             time_stats->profile_time[ip].total_time += (int)t;
         }
     }
+
+    return 0;
 }
 
 static const char *ggml_backend_names[] = {
diff --git a/examples/mulmat-tune/mulmat-tune.h b/examples/mulmat-tune/mulmat-tune.h
@@ -83,8 +83,8 @@ int ggml_mulmat_tune_write_data(const struct ggml_mulmat_tune *tune, FILE *fp);
 
 int ggml_mulmat_tune_read_data(struct ggml_mulmat_tune *tune, FILE *file);
 
-// return 0: ok, -1: M out of range or no data.
-void ggml_mulmat_tune_estimate_time(
+// return 0: ok, -1: shape not found
+int ggml_mulmat_tune_estimate_time(
     const struct ggml_mulmat_tune *tune, int M, int N, int K, int nth,
     struct ggml_mulmat_tune_time_stats *time_stats);
 
diff --git a/ggml.c b/ggml.c
@@ -14374,67 +14374,64 @@ void ggml_graph_compute_mul_mat_set_task_profile(struct ggml_cgraph *cgraph) {
         int N = (int)node->ne[0];
         int K = (int)node->src1->ne[0];
 
+        bool continuous = ggml_is_contiguous(node->src0) && ggml_is_contiguous(node->src1);
+
         struct ggml_task_profile *profiles;
         int n_profiles = ggml_mulmat_get_task_profiles(&profiles, node->src0->type, node->src1->type);
         GGML_ASSERT(n_profiles >= 2);
 
-        const struct ggml_task_profile *profile = &profiles[0]; // [0] is always the cpu only profile.
-
-        if (ggml_is_contiguous(node->src0) && ggml_is_contiguous(node->src1)) {
-            if (cgraph->mm_tune == NULL) {
-                // NOTE: assume all M are same within this comput egraph.
-                if (M >= 32 && N >= 32 && K >= 32) {
-                    // NOTE: copied from llama.cpp to here. But I assue that CUDA
-                    // or CL run in 1 OS-thread as well.
-                    if (ggml_cpu_has_blas()) {
-                        //if (!ggml_cpu_has_gpublas()) {
-                            cgraph->n_threads = 1;
-                            GGML_PRINT_THREAD_DEBUG(">>>> n_threads was set to 1");
-                        //}
-                        profile = &profiles[1]; // [1] is always the 1 thread gpu profile.
-                    }
-                }
-            } else {
-                int slot = ggml_mulmat_tune_cache_hash(M, N, K) % mm_cache_len;
-                struct mm_cache_element *e = &mm_cache[slot];
+        const struct ggml_task_profile *profile = NULL;
 
-                if (e->M == M && e->N == N && e->K == K) {
-                    profile = e->profile;
-                } else {
-                    struct ggml_mulmat_tune_time_stats t_stats;
-                    size_t sz = sizeof(struct ggml_mulmat_tune_profile_time) * n_profiles;
-                    t_stats.profile_time = malloc(sz);
-                    GGML_ASSERT(t_stats.profile_time);
-                    memset(t_stats.profile_time, 0, sz);
-
-                    ggml_mulmat_tune_estimate_time(
-                        cgraph->mm_tune, M, N, K, cgraph->n_threads, &t_stats);
-                        int min = INT32_MAX;
-                        profile = NULL;
-
-                        int i_profile;
-                        for (i_profile = 0; i_profile < t_stats.n_profiles; i_profile++) {
-                            int total = t_stats.profile_time[i_profile].total_time;
-                            if (total < min) {
-                                min = total;
-                                profile = &cgraph->mm_tune->profiles[i_profile];
-                            }
+        if (cgraph->mm_tune != NULL && continuous) {
+            int slot = ggml_mulmat_tune_cache_hash(M, N, K) % mm_cache_len;
+            struct mm_cache_element *e = &mm_cache[slot];
+
+            if (e->M == M && e->N == N && e->K == K) {
+                profile = e->profile;
+            } else {
+                struct ggml_mulmat_tune_time_stats t_stats;
+                size_t sz = sizeof(struct ggml_mulmat_tune_profile_time) * n_profiles;
+                t_stats.profile_time = malloc(sz);
+                GGML_ASSERT(t_stats.profile_time);
+                memset(t_stats.profile_time, 0, sz);
+
+                int rc = ggml_mulmat_tune_estimate_time(
+                    cgraph->mm_tune, M, N, K, cgraph->n_threads, &t_stats);
+                    int min = INT32_MAX;
+                    profile = NULL;
+
+                if (rc == 0) {
+                    int i_profile;
+                    for (i_profile = 0; i_profile < t_stats.n_profiles; i_profile++) {
+                        int total = t_stats.profile_time[i_profile].total_time;
+                        if (total < min) {
+                            min = total;
+                            profile = &cgraph->mm_tune->profiles[i_profile];
                         }
+                    }
 
-                        e->M = M;
-                        e->N = N;
-                        e->K = K;
-                        e->profile = profile;
+                    e->M = M;
+                    e->N = N;
+                    e->K = K;
+                    e->profile = profile;
 
-                        GGML_PRINT_THREAD_DEBUG("(1) M: %d, N: %d, K: %d, backends: %2d, %2d %2d\n",
-                                                M, N, K,
-                                                profile->stages[0].backend,
-                                                profile->stages[1].backend,
-                                                profile->stages[2].backend);
+                    GGML_PRINT_THREAD_DEBUG("(1) M: %d, N: %d, K: %d, backends: %2d, %2d %2d\n",
+                                            M, N, K,
+                                            profile->stages[0].backend,
+                                            profile->stages[1].backend,
+                                            profile->stages[2].backend);
                 }
             }
         }
 
+        if (profile == NULL) {
+            profile = &profiles[0]; // [0] is always the cpu only profile.
+            //if (continuous && M >= 32 && N >= 4096 && K >= 4096) {
+            if (continuous && M >= 32 && N >= 32 && K >= 32) {
+                profile = &profiles[1]; // [1] is always the 1 thread gpu profile.
+            }
+        }
+
         memcpy(&node->task_profile, profile, sizeof(struct ggml_task_profile));
 
         GGML_PRINT_THREAD_DEBUG("(2) M: %d, N: %d, K: %d, backends: %2d, %2d %2d\n",

Original file line number	Diff line number	Diff line change
`@@ -213,7 +213,7 @@ int ggml_mulmat_tune_write_data(const struct ggml_mulmat_tune tune, FILE fp) {`
`213`	`213`	`return 0;`
`214`	`214`	`}`
`215`	`215`
`216`		`-void ggml_mulmat_tune_estimate_time(`
	`216`	`+int ggml_mulmat_tune_estimate_time(`
`217`	`217`	`const struct ggml_mulmat_tune *tune, const int M, const int N, const int K,`
`218`	`218`	`const int nth, struct ggml_mulmat_tune_time_stats *time_stats) {`
`219`	`219`	`int shape_index = -1;`
`@@ -225,8 +225,7 @@ void ggml_mulmat_tune_estimate_time(`
`225`	`225`	`}`
`226`	`226`
`227`	`227`	`if (shape_index < 0) {`
`228`		`- fprintf(stderr, "%s: shape not found, N: %d, K: %d\n", __func__, N, K);`
`229`		`- abort();`
	`228`	`+ return -1;`
`230`	`229`	`}`
`231`	`230`
`232`	`231`	`time_stats->n_profiles = tune->n_profiles;`
`@@ -310,6 +309,8 @@ void ggml_mulmat_tune_estimate_time(`
`310`	`309`	`time_stats->profile_time[ip].total_time += (int)t;`
`311`	`310`	`}`
`312`	`311`	`}`
	`312`	`+`
	`313`	`+ return 0;`
`313`	`314`	`}`
`314`	`315`
`315`	`316`	`static const char *ggml_backend_names[] = {`