63
63
64
64
typedef ggml_fp16_t half;
65
65
66
+ static const std::shared_ptr<kp::Tensor> nullTensor = nullptr ;
67
+
66
68
static std::string ggml_kompute_format_name (int device) {
67
69
return " Kompute" + std::to_string (device);
68
70
}
@@ -586,31 +588,47 @@ ggml_vk_memory * ggml_vk_find_tensor(const struct ggml_tensor * t, uint64_t & of
586
588
}
587
589
588
590
static
589
- const std::shared_ptr<kp::Tensor> ggml_vk_get_tensor (const struct ggml_tensor * t, uint32_t * alignedOffset = nullptr ) {
590
- uint64_t originalOffset = 0 ;
591
- auto * res = ggml_vk_find_tensor (t, originalOffset );
591
+ const std::shared_ptr<kp::Tensor> ggml_vk_get_tensor_aligned (const struct ggml_tensor * t, uint32_t * aligned_offset ) {
592
+ uint64_t original_offset = 0 ;
593
+ auto * res = ggml_vk_find_tensor (t, original_offset );
592
594
if (!res) {
593
- static std::shared_ptr<kp::Tensor> nullTensor = nullptr ;
594
595
return nullTensor;
595
596
}
596
597
597
598
// Create a tensor whose memory will be composed of our buffers at the correct offset
598
- const size_t nelements = ggml_nelements (t);
599
599
size_t nbytes = ggml_nbytes (t);
600
+ size_t vulkan_offset = ggml_vk_aligned_offset (t->buffer , original_offset);
601
+ *aligned_offset = original_offset - vulkan_offset;
602
+ nbytes += *aligned_offset;
603
+
604
+ return komputeManager ()->tensor (
605
+ t->data ,
606
+ ggml_nelements (t), nbytes,
607
+ kp::Tensor::TensorDataTypes::eFloat,
608
+ res->primaryMemory , res->primaryBuffer ,
609
+ res->stagingMemory , res->stagingBuffer ,
610
+ vulkan_offset);
611
+ }
600
612
601
- size_t vulkanOffset = ggml_vk_aligned_offset (t->buffer , originalOffset);
602
- if (alignedOffset) {
603
- *alignedOffset = originalOffset - vulkanOffset;
604
- nbytes += *alignedOffset;
613
+ static
614
+ const std::shared_ptr<kp::Tensor> ggml_vk_get_tensor_slice (const struct ggml_tensor * t, size_t offset, size_t nbytes) {
615
+ uint64_t tensor_offset = 0 ;
616
+ auto * res = ggml_vk_find_tensor (t, tensor_offset);
617
+ if (!res) {
618
+ return nullTensor;
605
619
}
606
620
621
+ size_t elsz = ggml_element_size (t);
622
+ GGML_ASSERT (nbytes % elsz == 0 );
623
+
624
+ // Create a tensor whose memory will be composed of our buffers at the correct offset
607
625
return komputeManager ()->tensor (
608
- t->data ,
609
- nelements ,
610
- nbytes, kp::Tensor::TensorDataTypes::eFloat,
626
+ reinterpret_cast < char *>( t->data ) + offset ,
627
+ nbytes / elsz, nbytes ,
628
+ kp::Tensor::TensorDataTypes::eFloat,
611
629
res->primaryMemory , res->primaryBuffer ,
612
630
res->stagingMemory , res->stagingBuffer ,
613
- vulkanOffset );
631
+ tensor_offset + offset );
614
632
}
615
633
616
634
static std::vector<uint32_t > getSpirvShader (const unsigned char * rawData, size_t size) {
@@ -1561,13 +1579,12 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml
1561
1579
const enum ggml_type src1t = src1 ? src1->type : GGML_TYPE_COUNT;
1562
1580
const enum ggml_type dstt = dst ? dst->type : GGML_TYPE_COUNT;
1563
1581
1564
- const static std::shared_ptr<kp::Tensor> nullTensor = nullptr ;
1565
1582
uint32_t off_src0 = 0 ;
1566
1583
uint32_t off_src1 = 0 ;
1567
1584
uint32_t off_dst = 0 ;
1568
- const std::shared_ptr<kp::Tensor>& id_src0 = src0 ? ggml_vk_get_tensor (src0, &off_src0) : nullTensor;
1569
- const std::shared_ptr<kp::Tensor>& id_src1 = src1 ? ggml_vk_get_tensor (src1, &off_src1) : nullTensor;
1570
- const std::shared_ptr<kp::Tensor>& id_dst = dst ? ggml_vk_get_tensor (dst, &off_dst) : nullTensor;
1585
+ const std::shared_ptr<kp::Tensor>& id_src0 = src0 ? ggml_vk_get_tensor_aligned (src0, &off_src0) : nullTensor;
1586
+ const std::shared_ptr<kp::Tensor>& id_src1 = src1 ? ggml_vk_get_tensor_aligned (src1, &off_src1) : nullTensor;
1587
+ const std::shared_ptr<kp::Tensor>& id_dst = dst ? ggml_vk_get_tensor_aligned (dst, &off_dst) : nullTensor;
1571
1588
1572
1589
switch (dst->op ) {
1573
1590
case GGML_OP_ADD:
@@ -1900,7 +1917,7 @@ static void * ggml_backend_kompute_buffer_get_base(ggml_backend_buffer_t buffer)
1900
1917
static void ggml_backend_kompute_buffer_set_tensor (ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
1901
1918
GGML_UNUSED (buffer);
1902
1919
1903
- const auto res = ggml_vk_get_tensor (tensor);
1920
+ const auto res = ggml_vk_get_tensor_slice (tensor, offset, size );
1904
1921
GGML_ASSERT (res);
1905
1922
1906
1923
memcpy ((char *)tensor->data + offset, data, size);
@@ -1911,7 +1928,7 @@ static void ggml_backend_kompute_buffer_set_tensor(ggml_backend_buffer_t buffer,
1911
1928
static void ggml_backend_kompute_buffer_get_tensor (ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
1912
1929
GGML_UNUSED (buffer);
1913
1930
1914
- const auto res = ggml_vk_get_tensor (tensor);
1931
+ const auto res = ggml_vk_get_tensor_slice (tensor, offset, size );
1915
1932
GGML_ASSERT (res);
1916
1933
1917
1934
komputeManager ()->sequence ()->eval <kp::OpTensorSyncLocal>({res});
0 commit comments