@@ -539,7 +539,7 @@ struct test_case {
539
539
return false ;
540
540
}
541
541
542
- bool eval_perf (ggml_backend_t backend , const char * op_name) {
542
+ bool eval_perf (ggml_backend_t backend1, ggml_backend_t backend2 , const char * op_name) {
543
543
mode = MODE_PERF;
544
544
545
545
static const size_t graph_nodes = 8192 ;
@@ -562,9 +562,16 @@ struct test_case {
562
562
int len = printf (" %s(%s): " , op_desc (out).c_str (), vars ().c_str ());
563
563
fflush (stdout);
564
564
565
- // check if backends support op
566
- if (!ggml_backend_supports_op (backend, out)) {
567
- printf (" not supported\n " );
565
+ // check if the backends support the op
566
+ bool supported = true ;
567
+ for (ggml_backend_t backend : {backend1, backend2}) {
568
+ if (!ggml_backend_supports_op (backend, out)) {
569
+ printf (" not supported [%s] " , ggml_backend_name (backend));
570
+ supported = false ;
571
+ }
572
+ }
573
+ if (!supported) {
574
+ printf (" \n " );
568
575
ggml_free (ctx);
569
576
return true ;
570
577
}
@@ -579,7 +586,7 @@ struct test_case {
579
586
printf (" %*s" , last - len, " " );
580
587
581
588
// allocate
582
- ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors (ctx, backend );
589
+ ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors (ctx, backend1 );
583
590
if (buf == NULL ) {
584
591
printf (" failed to allocate tensors\n " );
585
592
ggml_free (ctx);
@@ -594,10 +601,10 @@ struct test_case {
594
601
ggml_build_forward_expand (gf, out);
595
602
596
603
// warmup run
597
- ggml_backend_graph_compute (backend , gf);
604
+ ggml_backend_graph_compute (backend1 , gf);
598
605
599
606
// duplicate the op
600
- size_t target_size = ggml_backend_is_cpu (backend ) ? 1ULL << 33 : 1ULL << 35 ; // 8 GB CPU, 32 GB GPU
607
+ size_t target_size = ggml_backend_is_cpu (backend1 ) ? 1ULL << 33 : 1ULL << 35 ; // 8 GB CPU, 32 GB GPU
601
608
int n_runs = std::min ((size_t )gf->size - gf->n_nodes , target_size / op_size (out)) + 1 ;
602
609
for (int i = 1 ; i < n_runs; i++) {
603
610
gf->nodes [gf->n_nodes ++] = out;
@@ -623,11 +630,11 @@ struct test_case {
623
630
}
624
631
625
632
// run
626
- ggml_backend_synchronize (backend );
633
+ ggml_backend_synchronize (backend1 );
627
634
628
635
int64_t start_time = ggml_time_us ();
629
- ggml_backend_graph_compute (backend , gf);
630
- ggml_backend_synchronize (backend );
636
+ ggml_backend_graph_compute (backend1 , gf);
637
+ ggml_backend_synchronize (backend1 );
631
638
int64_t end_time = ggml_time_us ();
632
639
double time_us = end_time - start_time;
633
640
@@ -2445,10 +2452,10 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
2445
2452
test_cases.emplace_back(new test_falcon(2));
2446
2453
#endif
2447
2454
2455
+ ggml_backend_t backend_cpu = ggml_backend_cpu_init ();
2456
+
2448
2457
// run tests
2449
2458
if (mode == MODE_TEST) {
2450
- ggml_backend_t backend_cpu = ggml_backend_cpu_init ();
2451
-
2452
2459
size_t n_ok = 0 ;
2453
2460
for (auto & test : test_cases) {
2454
2461
if (test->eval (backend, backend_cpu, op_name)) {
@@ -2464,7 +2471,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
2464
2471
2465
2472
if (mode == MODE_PERF) {
2466
2473
for (auto & test : test_cases) {
2467
- test->eval_perf (backend, op_name);
2474
+ test->eval_perf (backend, backend_cpu, op_name);
2468
2475
}
2469
2476
return true ;
2470
2477
}
0 commit comments