CUDA/HIP: fix tests/test-backend-ops

JohannesGaessler · JohannesGaessler · commit 0231713e9615 · 2024-08-06T23:44:19.000+02:00
diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu
@@ -2877,7 +2877,7 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
             return true;
         case GGML_OP_FLASH_ATTN_EXT:
 #if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
-            return op->src[0]->ne[0] == 64 || op->src[0]->ne[0] == 128;
+            return (op->src[0]->ne[0] == 64 && op->src[1]->type == GGML_TYPE_F16) || op->src[0]->ne[0] == 128;
 #else
             if (op->src[0]->ne[0] == 128) {
                 return true;
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
@@ -539,7 +539,7 @@ struct test_case {
         return false;
     }
 
-    bool eval_perf(ggml_backend_t backend, const char * op_name) {
+    bool eval_perf(ggml_backend_t backend1, ggml_backend_t backend2, const char * op_name) {
         mode = MODE_PERF;
 
         static const size_t graph_nodes = 8192;
@@ -562,9 +562,16 @@ struct test_case {
         int len = printf("  %s(%s): ", op_desc(out).c_str(), vars().c_str());
         fflush(stdout);
 
-        // check if backends support op
-        if (!ggml_backend_supports_op(backend, out)) {
-            printf("not supported\n");
+        // check if the backends support the op
+        bool supported = true;
+        for (ggml_backend_t backend : {backend1, backend2}) {
+            if (!ggml_backend_supports_op(backend, out)) {
+                printf("not supported [%s] ", ggml_backend_name(backend));
+                supported = false;
+            }
+        }
+        if (!supported) {
+            printf("\n");
             ggml_free(ctx);
             return true;
         }
@@ -579,7 +586,7 @@ struct test_case {
         printf("%*s", last - len, "");
 
         // allocate
-        ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend);
+        ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend1);
         if (buf == NULL) {
             printf("failed to allocate tensors\n");
             ggml_free(ctx);
@@ -594,10 +601,10 @@ struct test_case {
         ggml_build_forward_expand(gf, out);
 
         // warmup run
-        ggml_backend_graph_compute(backend, gf);
+        ggml_backend_graph_compute(backend1, gf);
 
         // duplicate the op
-        size_t target_size = ggml_backend_is_cpu(backend) ? 1ULL << 33 : 1ULL << 35; // 8 GB CPU, 32 GB GPU
+        size_t target_size = ggml_backend_is_cpu(backend1) ? 1ULL << 33 : 1ULL << 35; // 8 GB CPU, 32 GB GPU
         int n_runs = std::min((size_t)gf->size - gf->n_nodes, target_size / op_size(out)) + 1;
         for (int i = 1; i < n_runs; i++) {
             gf->nodes[gf->n_nodes++] = out;
@@ -623,11 +630,11 @@ struct test_case {
         }
 
         // run
-        ggml_backend_synchronize(backend);
+        ggml_backend_synchronize(backend1);
 
         int64_t start_time = ggml_time_us();
-        ggml_backend_graph_compute(backend, gf);
-        ggml_backend_synchronize(backend);
+        ggml_backend_graph_compute(backend1, gf);
+        ggml_backend_synchronize(backend1);
         int64_t end_time = ggml_time_us();
         double time_us = end_time - start_time;
 
@@ -2445,10 +2452,10 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
     test_cases.emplace_back(new test_falcon(2));
 #endif
 
+    ggml_backend_t backend_cpu = ggml_backend_cpu_init();
+
     // run tests
     if (mode == MODE_TEST) {
-        ggml_backend_t backend_cpu = ggml_backend_cpu_init();
-
         size_t n_ok = 0;
         for (auto & test : test_cases) {
             if (test->eval(backend, backend_cpu, op_name)) {
@@ -2464,7 +2471,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
 
     if (mode == MODE_PERF) {
         for (auto & test : test_cases) {
-            test->eval_perf(backend, op_name);
+            test->eval_perf(backend, backend_cpu, op_name);
         }
         return true;
     }