Skip to content

Commit c21a896

Browse files
authored
[CANN]: Fix ggml_backend_cann_buffer_get_tensor (#8871)
* cann: fix ggml_backend_cann_buffer_get_tensor 1. fix data ptr offset 2. enable the acquisition of incomplete tensors * fix backend cann set_tensor
1 parent d4ff847 commit c21a896

File tree

1 file changed

+38
-43
lines changed

1 file changed

+38
-43
lines changed

ggml/src/ggml-cann.cpp

Lines changed: 38 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -896,34 +896,32 @@ GGML_CALL static void ggml_backend_cann_buffer_init_tensor(
896896
* @param size Size of the data to be copied, in bytes.
897897
*/
898898
GGML_CALL static void ggml_backend_cann_buffer_set_tensor(
899-
ggml_backend_buffer_t buffer, ggml_tensor* tensor, const void* data,
899+
ggml_backend_buffer_t buffer, ggml_tensor *tensor, const void *data,
900900
size_t offset, size_t size) {
901-
// GGML_ASSERT(size == ggml_nbytes(tensor));
902-
ggml_backend_cann_buffer_context* ctx =
903-
(ggml_backend_cann_buffer_context*)buffer->context;
901+
ggml_backend_cann_buffer_context *ctx =
902+
(ggml_backend_cann_buffer_context *)buffer->context;
904903

905904
ggml_cann_set_device(ctx->device);
906905
// TODO: refer to cann(#6017), it use thread's default stream.
907906
// For acl, synchronous functions use this default stream.
908907
// Why aclrtSynchronizeDevice?
909908

910909
if (!need_transform(tensor->type)) {
911-
ACL_CHECK(aclrtMemcpy(tensor->data, size, (const char*)data + offset,
912-
size, ACL_MEMCPY_HOST_TO_DEVICE));
910+
ACL_CHECK(aclrtMemcpy((char *)tensor->data + offset, size, data, size,
911+
ACL_MEMCPY_HOST_TO_DEVICE));
913912
} else {
914-
void* transform_buffer = malloc(size);
915-
ggml_backend_cann_transform(tensor, (const char*)data + offset,
916-
transform_buffer);
913+
void *transform_buffer = malloc(size);
914+
ggml_backend_cann_transform(tensor, data, transform_buffer);
917915

918916
#ifndef NDEBUG
919-
void* check_buffer = malloc(size);
917+
void *check_buffer = malloc(size);
920918
ggml_backend_cann_transform_back(tensor, transform_buffer,
921919
check_buffer);
922-
GGML_ASSERT(memcmp((const char*)data + offset, check_buffer, size) ==
923-
0);
920+
GGML_ASSERT(memcmp(data, check_buffer, size) == 0);
924921
free(check_buffer);
925922
#endif
926-
ACL_CHECK(aclrtMemcpy(tensor->data, size, transform_buffer, size,
923+
ACL_CHECK(aclrtMemcpy((char *)tensor->data + offset, size,
924+
transform_buffer, size,
927925
ACL_MEMCPY_HOST_TO_DEVICE));
928926
free(transform_buffer);
929927
}
@@ -945,21 +943,20 @@ GGML_CALL static void ggml_backend_cann_buffer_set_tensor(
945943
GGML_CALL static void ggml_backend_cann_buffer_get_tensor(
946944
ggml_backend_buffer_t buffer, const ggml_tensor* tensor, void* data,
947945
size_t offset, size_t size) {
948-
GGML_ASSERT(size == ggml_nbytes(tensor));
949946
ggml_backend_cann_buffer_context* ctx =
950947
(ggml_backend_cann_buffer_context*)buffer->context;
951948

952949
ggml_cann_set_device(ctx->device);
953950

954951
if (!need_transform(tensor->type)) {
955-
ACL_CHECK(aclrtMemcpy((char*)data + offset, size, tensor->data, size,
952+
ACL_CHECK(aclrtMemcpy(data, size, (char*)tensor->data + offset, size,
956953
ACL_MEMCPY_DEVICE_TO_HOST));
957954
} else {
958955
void* transform_buffer = malloc(size);
959-
ACL_CHECK(aclrtMemcpy(transform_buffer, size, tensor->data, size,
956+
ACL_CHECK(aclrtMemcpy(transform_buffer, size,
957+
(char*)tensor->data + offset, size,
960958
ACL_MEMCPY_DEVICE_TO_HOST));
961-
ggml_backend_cann_transform_back(tensor, transform_buffer,
962-
(char*)data + offset);
959+
ggml_backend_cann_transform_back(tensor, transform_buffer, data);
963960
free(transform_buffer);
964961
}
965962
}
@@ -1448,60 +1445,58 @@ ggml_backend_cann_get_default_buffer_type(ggml_backend_t backend) {
14481445
* @param size Size of the data to copy in bytes.
14491446
*/
14501447
GGML_CALL static void ggml_backend_cann_set_tensor_async(ggml_backend_t backend,
1451-
ggml_tensor* tensor,
1452-
const void* data,
1448+
ggml_tensor *tensor,
1449+
const void *data,
14531450
size_t offset,
14541451
size_t size) {
1455-
ggml_backend_cann_context* cann_ctx =
1456-
(ggml_backend_cann_context*)backend->context;
1452+
ggml_backend_cann_context *cann_ctx =
1453+
(ggml_backend_cann_context *)backend->context;
14571454

14581455
if (!need_transform(tensor->type)) {
1459-
ACL_CHECK(aclrtMemcpyAsync(
1460-
tensor->data, size, (const char*)data + offset, size,
1461-
ACL_MEMCPY_HOST_TO_DEVICE, cann_ctx->stream()));
1456+
ACL_CHECK(aclrtMemcpyAsync((char *)tensor->data + offset, size, data,
1457+
size, ACL_MEMCPY_HOST_TO_DEVICE,
1458+
cann_ctx->stream()));
14621459
} else {
1463-
void* transform_buffer = malloc(size);
1464-
ggml_backend_cann_transform(tensor, (const char*)data + offset,
1465-
transform_buffer);
1460+
void *transform_buffer = malloc(size);
1461+
ggml_backend_cann_transform(tensor, data, transform_buffer);
14661462

14671463
#ifndef NDEBUG
1468-
void* check_buffer = malloc(size);
1464+
void *check_buffer = malloc(size);
14691465
ggml_backend_cann_transform_back(tensor, transform_buffer,
14701466
check_buffer);
1471-
GGML_ASSERT(memcmp((const char*)data + offset, check_buffer, size));
1467+
GGML_ASSERT(memcmp(data, check_buffer, size));
14721468
free(check_buffer);
14731469
#endif
1474-
ACL_CHECK(aclrtMemcpyAsync(tensor->data, size, transform_buffer, size,
1475-
ACL_MEMCPY_HOST_TO_DEVICE,
1476-
cann_ctx->stream()));
1470+
ACL_CHECK(aclrtMemcpyAsync(
1471+
(char *)tensor->data + offset, size, transform_buffer, size,
1472+
ACL_MEMCPY_HOST_TO_DEVICE, cann_ctx->stream()));
14771473
ACL_CHECK(aclrtSynchronizeStream(cann_ctx->stream()));
14781474
free(transform_buffer);
14791475
}
14801476
}
14811477

14821478
GGML_CALL static void ggml_backend_cann_get_tensor_async(
1483-
ggml_backend_t backend, const ggml_tensor* tensor, void* data,
1479+
ggml_backend_t backend, const ggml_tensor *tensor, void *data,
14841480
size_t offset, size_t size) {
1485-
ggml_backend_cann_context* cann_ctx =
1486-
(ggml_backend_cann_context*)backend->context;
1481+
ggml_backend_cann_context *cann_ctx =
1482+
(ggml_backend_cann_context *)backend->context;
14871483
ggml_backend_buffer_t buf =
14881484
tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
14891485

14901486
GGML_ASSERT(buf->buft == ggml_backend_cann_buffer_type(cann_ctx->device) &&
14911487
"unsupported buffer type");
14921488

14931489
if (!need_transform(tensor->type)) {
1494-
ACL_CHECK(aclrtMemcpyAsync((char*)data + offset, size, tensor->data,
1490+
ACL_CHECK(aclrtMemcpyAsync(data, size, (char *)tensor->data + offset,
14951491
size, ACL_MEMCPY_DEVICE_TO_HOST,
14961492
cann_ctx->stream()));
14971493
} else {
1498-
void* transform_buffer = malloc(size);
1499-
ACL_CHECK(aclrtMemcpyAsync(transform_buffer, size, tensor->data, size,
1500-
ACL_MEMCPY_DEVICE_TO_HOST,
1501-
cann_ctx->stream()));
1494+
void *transform_buffer = malloc(size);
1495+
ACL_CHECK(aclrtMemcpyAsync(
1496+
transform_buffer, size, (char *)tensor->data + offset, size,
1497+
ACL_MEMCPY_DEVICE_TO_HOST, cann_ctx->stream()));
15021498
ACL_CHECK(aclrtSynchronizeStream(cann_ctx->stream()));
1503-
ggml_backend_cann_transform_back(tensor, transform_buffer,
1504-
(char*)data + offset);
1499+
ggml_backend_cann_transform_back(tensor, transform_buffer, data);
15051500
free(transform_buffer);
15061501
}
15071502
}

0 commit comments

Comments
 (0)