Skip to content

Commit 397b1f8

Browse files
committed
vulkan : add dev notes
1 parent 536983b commit 397b1f8

File tree

2 files changed

+11
-1
lines changed

2 files changed

+11
-1
lines changed

ggml-kompute.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1559,12 +1559,19 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml
15591559
case GGML_OP_SOFT_MAX:
15601560
{
15611561
float scale;
1562-
memcpy(&scale, dst->op_params, sizeof(float));
1562+
float max_bias;
1563+
1564+
memcpy(&scale, (float *)dst->op_params + 0, sizeof(float));
1565+
memcpy(&max_bias, (float *)dst->op_params + 1, sizeof(float));
15631566

15641567
#pragma message("TODO: add ggml_vk_soft_max() F16 src1 support")
15651568
#pragma message("ref: https://github.com/ggerganov/llama.cpp/pull/5021")
15661569
GGML_ASSERT(!src1 || src1t == GGML_TYPE_F32);
15671570

1571+
#pragma message("TODO: add ALiBi support")
1572+
#pragma message("ref: https://github.com/ggerganov/llama.cpp/pull/7192")
1573+
GGML_ASSERT(max_bias == 0.0f);
1574+
15681575
ggml_vk_soft_max(seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, ne00, ne01, ne02, ne03, scale);
15691576
} break;
15701577
case GGML_OP_DIAG_MASK_INF:

ggml-vulkan.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4285,6 +4285,9 @@ static void ggml_vk_soft_max(ggml_backend_vk_context * ctx, vk_context * subctx,
42854285
const float m0 = powf(2.0f, -(max_bias ) / n_head_log2);
42864286
const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2);
42874287

4288+
#pragma message("TODO: src2 is no longer used in soft_max - should be removed and ALiBi calculation should be updated")
4289+
#pragma message("ref: https://github.com/ggerganov/llama.cpp/pull/7192")
4290+
42884291
ggml_vk_op_f32<vk_op_soft_max_push_constants>(ctx, subctx, src0, src1, src2, dst, GGML_OP_SOFT_MAX, {
42894292
ncols,
42904293
src1 != nullptr ? nrows_y : (uint32_t)0,

0 commit comments

Comments
 (0)