Skip to content

Commit 0231713

Browse files
CUDA/HIP: fix tests/test-backend-ops
1 parent 1e6f655 commit 0231713

File tree

2 files changed

+21
-14
lines changed

2 files changed

+21
-14
lines changed

ggml/src/ggml-cuda.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2877,7 +2877,7 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
28772877
return true;
28782878
case GGML_OP_FLASH_ATTN_EXT:
28792879
#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
2880-
return op->src[0]->ne[0] == 64 || op->src[0]->ne[0] == 128;
2880+
return (op->src[0]->ne[0] == 64 && op->src[1]->type == GGML_TYPE_F16) || op->src[0]->ne[0] == 128;
28812881
#else
28822882
if (op->src[0]->ne[0] == 128) {
28832883
return true;

tests/test-backend-ops.cpp

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,7 @@ struct test_case {
539539
return false;
540540
}
541541

542-
bool eval_perf(ggml_backend_t backend, const char * op_name) {
542+
bool eval_perf(ggml_backend_t backend1, ggml_backend_t backend2, const char * op_name) {
543543
mode = MODE_PERF;
544544

545545
static const size_t graph_nodes = 8192;
@@ -562,9 +562,16 @@ struct test_case {
562562
int len = printf(" %s(%s): ", op_desc(out).c_str(), vars().c_str());
563563
fflush(stdout);
564564

565-
// check if backends support op
566-
if (!ggml_backend_supports_op(backend, out)) {
567-
printf("not supported\n");
565+
// check if the backends support the op
566+
bool supported = true;
567+
for (ggml_backend_t backend : {backend1, backend2}) {
568+
if (!ggml_backend_supports_op(backend, out)) {
569+
printf("not supported [%s] ", ggml_backend_name(backend));
570+
supported = false;
571+
}
572+
}
573+
if (!supported) {
574+
printf("\n");
568575
ggml_free(ctx);
569576
return true;
570577
}
@@ -579,7 +586,7 @@ struct test_case {
579586
printf("%*s", last - len, "");
580587

581588
// allocate
582-
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend);
589+
ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend1);
583590
if (buf == NULL) {
584591
printf("failed to allocate tensors\n");
585592
ggml_free(ctx);
@@ -594,10 +601,10 @@ struct test_case {
594601
ggml_build_forward_expand(gf, out);
595602

596603
// warmup run
597-
ggml_backend_graph_compute(backend, gf);
604+
ggml_backend_graph_compute(backend1, gf);
598605

599606
// duplicate the op
600-
size_t target_size = ggml_backend_is_cpu(backend) ? 1ULL << 33 : 1ULL << 35; // 8 GB CPU, 32 GB GPU
607+
size_t target_size = ggml_backend_is_cpu(backend1) ? 1ULL << 33 : 1ULL << 35; // 8 GB CPU, 32 GB GPU
601608
int n_runs = std::min((size_t)gf->size - gf->n_nodes, target_size / op_size(out)) + 1;
602609
for (int i = 1; i < n_runs; i++) {
603610
gf->nodes[gf->n_nodes++] = out;
@@ -623,11 +630,11 @@ struct test_case {
623630
}
624631

625632
// run
626-
ggml_backend_synchronize(backend);
633+
ggml_backend_synchronize(backend1);
627634

628635
int64_t start_time = ggml_time_us();
629-
ggml_backend_graph_compute(backend, gf);
630-
ggml_backend_synchronize(backend);
636+
ggml_backend_graph_compute(backend1, gf);
637+
ggml_backend_synchronize(backend1);
631638
int64_t end_time = ggml_time_us();
632639
double time_us = end_time - start_time;
633640

@@ -2445,10 +2452,10 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
24452452
test_cases.emplace_back(new test_falcon(2));
24462453
#endif
24472454

2455+
ggml_backend_t backend_cpu = ggml_backend_cpu_init();
2456+
24482457
// run tests
24492458
if (mode == MODE_TEST) {
2450-
ggml_backend_t backend_cpu = ggml_backend_cpu_init();
2451-
24522459
size_t n_ok = 0;
24532460
for (auto & test : test_cases) {
24542461
if (test->eval(backend, backend_cpu, op_name)) {
@@ -2464,7 +2471,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
24642471

24652472
if (mode == MODE_PERF) {
24662473
for (auto & test : test_cases) {
2467-
test->eval_perf(backend, op_name);
2474+
test->eval_perf(backend, backend_cpu, op_name);
24682475
}
24692476
return true;
24702477
}

0 commit comments

Comments
 (0)