Skip to content

Commit 1e0f5ad

Browse files
committed
ggml : restrict op on other backends to equal head sizes
ggml-ci
1 parent ac33a92 commit 1e0f5ad

File tree

2 files changed

+11
-0
lines changed

2 files changed

+11
-0
lines changed

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3232,6 +3232,13 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
32323232
#ifndef FLASH_ATTN_AVAILABLE
32333233
return false;
32343234
#endif // FLASH_ATTN_AVAILABLE
3235+
if (op->src[1]->ne[0] != op->src[2]->ne[0]) {
3236+
// different head sizes of K and V are not supported yet
3237+
return false;
3238+
}
3239+
if (op->src[0]->ne[0] == 192) {
3240+
return false;
3241+
}
32353242
if (op->src[0]->ne[3] != 1) {
32363243
return false;
32373244
}

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8764,6 +8764,10 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
87648764
default:
87658765
return false;
87668766
}
8767+
if (op->src[1]->ne[0] != op->src[2]->ne[0]) {
8768+
// different head sizes of K and V are not supported yet
8769+
return false;
8770+
}
87678771
if (op->src[0]->type != GGML_TYPE_F32) {
87688772
return false;
87698773
}

0 commit comments

Comments
 (0)