Skip to content

Commit 68614ce

Browse files
committed
Apply ggerganov's fixes for test-backend-ops
1 parent 88b97b8 commit 68614ce

File tree

4 files changed

+10
-5
lines changed

4 files changed

+10
-5
lines changed

ggml-metal.m

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -782,7 +782,7 @@ static bool ggml_metal_supports_op(const struct ggml_metal_context * ctx, const
782782
case GGML_OP_DIAG_MASK_INF:
783783
case GGML_OP_GET_ROWS:
784784
{
785-
return op->ne[3] == 1;
785+
return op->src[0]->type != GGML_TYPE_BF16 && op->ne[3] == 1;
786786
}
787787
default:
788788
return false;

ggml.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19426,7 +19426,10 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
1942619426
case GGML_OP_CPY:
1942719427
case GGML_OP_DUP:
1942819428
{
19429-
if (ggml_is_quantized(node->type)) {
19429+
if (ggml_is_quantized(node->type) ||
19430+
// F16 -> BF16 and BF16 -> F16 copies go through intermediate F32
19431+
(node->src[0]->type == GGML_TYPE_F16 && node->src[1] && node->src[1]->type == GGML_TYPE_BF16) ||
19432+
(node->src[0]->type == GGML_TYPE_BF16 && node->src[1] && node->src[1]->type == GGML_TYPE_F16)) {
1943019433
cur = ggml_type_size(GGML_TYPE_F32) * node->ne[0] * n_tasks;
1943119434
}
1943219435
} break;

gguf-py/gguf/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -864,7 +864,6 @@ def get_type(val: Any) -> GGUFValueType:
864864
GGML_QUANT_SIZES = {
865865
GGMLQuantizationType.F32: (1, 4),
866866
GGMLQuantizationType.F16: (1, 2),
867-
GGMLQuantizationType.BF16: (1, 2),
868867
GGMLQuantizationType.Q4_0: (32, 2 + 16),
869868
GGMLQuantizationType.Q4_1: (32, 2 + 2 + 16),
870869
GGMLQuantizationType.Q5_0: (32, 2 + 4 + 16),
@@ -891,6 +890,7 @@ def get_type(val: Any) -> GGUFValueType:
891890
GGMLQuantizationType.I64: (1, 8),
892891
GGMLQuantizationType.F64: (1, 8),
893892
GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
893+
GGMLQuantizationType.BF16: (1, 2),
894894
}
895895

896896

tests/test-backend-ops.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
5050

5151
if (tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_I32) {
5252
ggml_backend_tensor_set(tensor, data.data(), 0, size * sizeof(float));
53-
} else if (ggml_is_quantized(tensor->type) || tensor->type == GGML_TYPE_F16) {
53+
} else if (ggml_is_quantized(tensor->type) || tensor->type == GGML_TYPE_F16 || tensor->type == GGML_TYPE_BF16) {
5454
GGML_ASSERT(size % ggml_blck_size(tensor->type) == 0);
5555
std::vector<uint8_t> dataq(ggml_row_size(tensor->type, size));
5656
std::vector<float> imatrix(tensor->ne[0], 1.0f); // dummy importance matrix
@@ -92,6 +92,8 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
9292
size_t i = i3*t->nb[3] + i2*t->nb[2] + i1*t->nb[1] + i0/bs*t->nb[0];
9393
if (t->type == GGML_TYPE_F16) {
9494
tv.push_back(ggml_fp16_to_fp32(*(ggml_fp16_t*)&buf[i]));
95+
} else if (t->type == GGML_TYPE_BF16) {
96+
tv.push_back(ggml_bf16_to_fp32(*(ggml_bf16_t*)&buf[i]));
9597
} else if (t->type == GGML_TYPE_F32) {
9698
tv.push_back(*(float *) &buf[i]);
9799
} else if (t->type == GGML_TYPE_I32) {
@@ -1864,7 +1866,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
18641866
std::default_random_engine rng(0);
18651867

18661868
const ggml_type all_types[] = {
1867-
GGML_TYPE_F32, GGML_TYPE_F16,
1869+
GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_BF16,
18681870
GGML_TYPE_Q4_0, GGML_TYPE_Q4_1,
18691871
GGML_TYPE_Q5_0, GGML_TYPE_Q5_1,
18701872
GGML_TYPE_Q8_0,

0 commit comments

Comments
 (0)