Skip to content

Commit 6be1fda

Browse files
committed
kompute : make partial tensor copies faster by syncing less data (#15)
1 parent 9a3674d commit 6be1fda

File tree

1 file changed

+36
-19
lines changed

1 file changed

+36
-19
lines changed

ggml/src/ggml-kompute.cpp

Lines changed: 36 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@
6363

6464
typedef ggml_fp16_t half;
6565

66+
static const std::shared_ptr<kp::Tensor> nullTensor = nullptr;
67+
6668
static std::string ggml_kompute_format_name(int device) {
6769
return "Kompute" + std::to_string(device);
6870
}
@@ -586,31 +588,47 @@ ggml_vk_memory * ggml_vk_find_tensor(const struct ggml_tensor * t, uint64_t & of
586588
}
587589

588590
static
589-
const std::shared_ptr<kp::Tensor> ggml_vk_get_tensor(const struct ggml_tensor * t, uint32_t * alignedOffset = nullptr) {
590-
uint64_t originalOffset = 0;
591-
auto * res = ggml_vk_find_tensor(t, originalOffset);
591+
const std::shared_ptr<kp::Tensor> ggml_vk_get_tensor_aligned(const struct ggml_tensor * t, uint32_t * aligned_offset) {
592+
uint64_t original_offset = 0;
593+
auto * res = ggml_vk_find_tensor(t, original_offset);
592594
if (!res) {
593-
static std::shared_ptr<kp::Tensor> nullTensor = nullptr;
594595
return nullTensor;
595596
}
596597

597598
// Create a tensor whose memory will be composed of our buffers at the correct offset
598-
const size_t nelements = ggml_nelements(t);
599599
size_t nbytes = ggml_nbytes(t);
600+
size_t vulkan_offset = ggml_vk_aligned_offset(t->buffer, original_offset);
601+
*aligned_offset = original_offset - vulkan_offset;
602+
nbytes += *aligned_offset;
603+
604+
return komputeManager()->tensor(
605+
t->data,
606+
ggml_nelements(t), nbytes,
607+
kp::Tensor::TensorDataTypes::eFloat,
608+
res->primaryMemory, res->primaryBuffer,
609+
res->stagingMemory, res->stagingBuffer,
610+
vulkan_offset);
611+
}
600612

601-
size_t vulkanOffset = ggml_vk_aligned_offset(t->buffer, originalOffset);
602-
if (alignedOffset) {
603-
*alignedOffset = originalOffset - vulkanOffset;
604-
nbytes += *alignedOffset;
613+
static
614+
const std::shared_ptr<kp::Tensor> ggml_vk_get_tensor_slice(const struct ggml_tensor * t, size_t offset, size_t nbytes) {
615+
uint64_t tensor_offset = 0;
616+
auto * res = ggml_vk_find_tensor(t, tensor_offset);
617+
if (!res) {
618+
return nullTensor;
605619
}
606620

621+
size_t elsz = ggml_element_size(t);
622+
GGML_ASSERT(nbytes % elsz == 0);
623+
624+
// Create a tensor whose memory will be composed of our buffers at the correct offset
607625
return komputeManager()->tensor(
608-
t->data,
609-
nelements,
610-
nbytes, kp::Tensor::TensorDataTypes::eFloat,
626+
reinterpret_cast<char *>(t->data) + offset,
627+
nbytes / elsz, nbytes,
628+
kp::Tensor::TensorDataTypes::eFloat,
611629
res->primaryMemory, res->primaryBuffer,
612630
res->stagingMemory, res->stagingBuffer,
613-
vulkanOffset);
631+
tensor_offset + offset);
614632
}
615633

616634
static std::vector<uint32_t> getSpirvShader(const unsigned char* rawData, size_t size) {
@@ -1561,13 +1579,12 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml
15611579
const enum ggml_type src1t = src1 ? src1->type : GGML_TYPE_COUNT;
15621580
const enum ggml_type dstt = dst ? dst->type : GGML_TYPE_COUNT;
15631581

1564-
const static std::shared_ptr<kp::Tensor> nullTensor = nullptr;
15651582
uint32_t off_src0 = 0;
15661583
uint32_t off_src1 = 0;
15671584
uint32_t off_dst = 0;
1568-
const std::shared_ptr<kp::Tensor>& id_src0 = src0 ? ggml_vk_get_tensor(src0, &off_src0) : nullTensor;
1569-
const std::shared_ptr<kp::Tensor>& id_src1 = src1 ? ggml_vk_get_tensor(src1, &off_src1) : nullTensor;
1570-
const std::shared_ptr<kp::Tensor>& id_dst = dst ? ggml_vk_get_tensor(dst, &off_dst) : nullTensor;
1585+
const std::shared_ptr<kp::Tensor>& id_src0 = src0 ? ggml_vk_get_tensor_aligned(src0, &off_src0) : nullTensor;
1586+
const std::shared_ptr<kp::Tensor>& id_src1 = src1 ? ggml_vk_get_tensor_aligned(src1, &off_src1) : nullTensor;
1587+
const std::shared_ptr<kp::Tensor>& id_dst = dst ? ggml_vk_get_tensor_aligned(dst, &off_dst) : nullTensor;
15711588

15721589
switch (dst->op) {
15731590
case GGML_OP_ADD:
@@ -1900,7 +1917,7 @@ static void * ggml_backend_kompute_buffer_get_base(ggml_backend_buffer_t buffer)
19001917
static void ggml_backend_kompute_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
19011918
GGML_UNUSED(buffer);
19021919

1903-
const auto res = ggml_vk_get_tensor(tensor);
1920+
const auto res = ggml_vk_get_tensor_slice(tensor, offset, size);
19041921
GGML_ASSERT(res);
19051922

19061923
memcpy((char *)tensor->data + offset, data, size);
@@ -1911,7 +1928,7 @@ static void ggml_backend_kompute_buffer_set_tensor(ggml_backend_buffer_t buffer,
19111928
static void ggml_backend_kompute_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
19121929
GGML_UNUSED(buffer);
19131930

1914-
const auto res = ggml_vk_get_tensor(tensor);
1931+
const auto res = ggml_vk_get_tensor_slice(tensor, offset, size);
19151932
GGML_ASSERT(res);
19161933

19171934
komputeManager()->sequence()->eval<kp::OpTensorSyncLocal>({res});

0 commit comments

Comments
 (0)