62
62
63
63
typedef ggml_fp16_t half;
64
64
65
+ static const std::shared_ptr<kp::Tensor> nullTensor = nullptr ;
66
+
65
67
static std::string ggml_kompute_format_name (int device) {
66
68
return " Kompute" + std::to_string (device);
67
69
}
@@ -585,31 +587,47 @@ ggml_vk_memory * ggml_vk_find_tensor(const struct ggml_tensor * t, uint64_t & of
585
587
}
586
588
587
589
static
588
- const std::shared_ptr<kp::Tensor> ggml_vk_get_tensor (const struct ggml_tensor * t, uint32_t * alignedOffset = nullptr ) {
589
- uint64_t originalOffset = 0 ;
590
- auto * res = ggml_vk_find_tensor (t, originalOffset );
590
+ const std::shared_ptr<kp::Tensor> ggml_vk_get_tensor_aligned (const struct ggml_tensor * t, uint32_t * aligned_offset ) {
591
+ uint64_t original_offset = 0 ;
592
+ auto * res = ggml_vk_find_tensor (t, original_offset );
591
593
if (!res) {
592
- static std::shared_ptr<kp::Tensor> nullTensor = nullptr ;
593
594
return nullTensor;
594
595
}
595
596
596
597
// Create a tensor whose memory will be composed of our buffers at the correct offset
597
- const size_t nelements = ggml_nelements (t);
598
598
size_t nbytes = ggml_nbytes (t);
599
+ size_t vulkan_offset = ggml_vk_aligned_offset (t->buffer , original_offset);
600
+ *aligned_offset = original_offset - vulkan_offset;
601
+ nbytes += *aligned_offset;
602
+
603
+ return komputeManager ()->tensor (
604
+ t->data ,
605
+ ggml_nelements (t), nbytes,
606
+ kp::Tensor::TensorDataTypes::eFloat,
607
+ res->primaryMemory , res->primaryBuffer ,
608
+ res->stagingMemory , res->stagingBuffer ,
609
+ vulkan_offset);
610
+ }
599
611
600
- size_t vulkanOffset = ggml_vk_aligned_offset (t->buffer , originalOffset);
601
- if (alignedOffset) {
602
- *alignedOffset = originalOffset - vulkanOffset;
603
- nbytes += *alignedOffset;
612
+ static
613
+ const std::shared_ptr<kp::Tensor> ggml_vk_get_tensor_slice (const struct ggml_tensor * t, size_t offset, size_t nbytes) {
614
+ uint64_t tensor_offset = 0 ;
615
+ auto * res = ggml_vk_find_tensor (t, tensor_offset);
616
+ if (!res) {
617
+ return nullTensor;
604
618
}
605
619
620
+ size_t elsz = ggml_element_size (t);
621
+ GGML_ASSERT (nbytes % elsz == 0 );
622
+
623
+ // Create a tensor whose memory will be composed of our buffers at the correct offset
606
624
return komputeManager ()->tensor (
607
- t->data ,
608
- nelements ,
609
- nbytes, kp::Tensor::TensorDataTypes::eFloat,
625
+ reinterpret_cast < char *>( t->data ) + offset ,
626
+ nbytes / elsz, nbytes ,
627
+ kp::Tensor::TensorDataTypes::eFloat,
610
628
res->primaryMemory , res->primaryBuffer ,
611
629
res->stagingMemory , res->stagingBuffer ,
612
- vulkanOffset );
630
+ tensor_offset + offset );
613
631
}
614
632
615
633
static std::vector<uint32_t > getSpirvShader (const unsigned char * rawData, size_t size) {
@@ -1546,13 +1564,12 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml
1546
1564
const enum ggml_type src1t = src1 ? src1->type : GGML_TYPE_COUNT;
1547
1565
const enum ggml_type dstt = dst ? dst->type : GGML_TYPE_COUNT;
1548
1566
1549
- const static std::shared_ptr<kp::Tensor> nullTensor = nullptr ;
1550
1567
uint32_t off_src0 = 0 ;
1551
1568
uint32_t off_src1 = 0 ;
1552
1569
uint32_t off_dst = 0 ;
1553
- const std::shared_ptr<kp::Tensor>& id_src0 = src0 ? ggml_vk_get_tensor (src0, &off_src0) : nullTensor;
1554
- const std::shared_ptr<kp::Tensor>& id_src1 = src1 ? ggml_vk_get_tensor (src1, &off_src1) : nullTensor;
1555
- const std::shared_ptr<kp::Tensor>& id_dst = dst ? ggml_vk_get_tensor (dst, &off_dst) : nullTensor;
1570
+ const std::shared_ptr<kp::Tensor>& id_src0 = src0 ? ggml_vk_get_tensor_aligned (src0, &off_src0) : nullTensor;
1571
+ const std::shared_ptr<kp::Tensor>& id_src1 = src1 ? ggml_vk_get_tensor_aligned (src1, &off_src1) : nullTensor;
1572
+ const std::shared_ptr<kp::Tensor>& id_dst = dst ? ggml_vk_get_tensor_aligned (dst, &off_dst) : nullTensor;
1556
1573
1557
1574
switch (dst->op ) {
1558
1575
case GGML_OP_ADD:
@@ -1865,7 +1882,7 @@ static void * ggml_backend_kompute_buffer_get_base(ggml_backend_buffer_t buffer)
1865
1882
static void ggml_backend_kompute_buffer_set_tensor (ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
1866
1883
GGML_UNUSED (buffer);
1867
1884
1868
- const auto res = ggml_vk_get_tensor (tensor);
1885
+ const auto res = ggml_vk_get_tensor_slice (tensor, offset, size );
1869
1886
GGML_ASSERT (res);
1870
1887
1871
1888
memcpy ((char *)tensor->data + offset, data, size);
@@ -1876,7 +1893,7 @@ static void ggml_backend_kompute_buffer_set_tensor(ggml_backend_buffer_t buffer,
1876
1893
static void ggml_backend_kompute_buffer_get_tensor (ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
1877
1894
GGML_UNUSED (buffer);
1878
1895
1879
- const auto res = ggml_vk_get_tensor (tensor);
1896
+ const auto res = ggml_vk_get_tensor_slice (tensor, offset, size );
1880
1897
GGML_ASSERT (res);
1881
1898
1882
1899
komputeManager ()->sequence ()->eval <kp::OpTensorSyncLocal>({res});
0 commit comments