@@ -896,34 +896,32 @@ GGML_CALL static void ggml_backend_cann_buffer_init_tensor(
896
896
* @param size Size of the data to be copied, in bytes.
897
897
*/
898
898
GGML_CALL static void ggml_backend_cann_buffer_set_tensor (
899
- ggml_backend_buffer_t buffer, ggml_tensor* tensor, const void * data,
899
+ ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data,
900
900
size_t offset, size_t size) {
901
- // GGML_ASSERT(size == ggml_nbytes(tensor));
902
- ggml_backend_cann_buffer_context* ctx =
903
- (ggml_backend_cann_buffer_context*)buffer->context ;
901
+ ggml_backend_cann_buffer_context *ctx =
902
+ (ggml_backend_cann_buffer_context *)buffer->context ;
904
903
905
904
ggml_cann_set_device (ctx->device );
906
905
// TODO: refer to cann(#6017), it use thread's default stream.
907
906
// For acl, synchronous functions use this default stream.
908
907
// Why aclrtSynchronizeDevice?
909
908
910
909
if (!need_transform (tensor->type )) {
911
- ACL_CHECK (aclrtMemcpy (tensor->data , size, ( const char *) data + offset ,
912
- size, ACL_MEMCPY_HOST_TO_DEVICE));
910
+ ACL_CHECK (aclrtMemcpy (( char *) tensor->data + offset , size, data, size ,
911
+ ACL_MEMCPY_HOST_TO_DEVICE));
913
912
} else {
914
- void * transform_buffer = malloc (size);
915
- ggml_backend_cann_transform (tensor, (const char *)data + offset,
916
- transform_buffer);
913
+ void *transform_buffer = malloc (size);
914
+ ggml_backend_cann_transform (tensor, data, transform_buffer);
917
915
918
916
#ifndef NDEBUG
919
- void * check_buffer = malloc (size);
917
+ void * check_buffer = malloc (size);
920
918
ggml_backend_cann_transform_back (tensor, transform_buffer,
921
919
check_buffer);
922
- GGML_ASSERT (memcmp ((const char *)data + offset, check_buffer, size) ==
923
- 0 );
920
+ GGML_ASSERT (memcmp (data, check_buffer, size) == 0 );
924
921
free (check_buffer);
925
922
#endif
926
- ACL_CHECK (aclrtMemcpy (tensor->data , size, transform_buffer, size,
923
+ ACL_CHECK (aclrtMemcpy ((char *)tensor->data + offset, size,
924
+ transform_buffer, size,
927
925
ACL_MEMCPY_HOST_TO_DEVICE));
928
926
free (transform_buffer);
929
927
}
@@ -945,21 +943,20 @@ GGML_CALL static void ggml_backend_cann_buffer_set_tensor(
945
943
GGML_CALL static void ggml_backend_cann_buffer_get_tensor (
946
944
ggml_backend_buffer_t buffer, const ggml_tensor* tensor, void * data,
947
945
size_t offset, size_t size) {
948
- GGML_ASSERT (size == ggml_nbytes (tensor));
949
946
ggml_backend_cann_buffer_context* ctx =
950
947
(ggml_backend_cann_buffer_context*)buffer->context ;
951
948
952
949
ggml_cann_set_device (ctx->device );
953
950
954
951
if (!need_transform (tensor->type )) {
955
- ACL_CHECK (aclrtMemcpy (( char *) data + offset , size, tensor->data , size,
952
+ ACL_CHECK (aclrtMemcpy (data, size, ( char *) tensor->data + offset , size,
956
953
ACL_MEMCPY_DEVICE_TO_HOST));
957
954
} else {
958
955
void * transform_buffer = malloc (size);
959
- ACL_CHECK (aclrtMemcpy (transform_buffer, size, tensor->data , size,
956
+ ACL_CHECK (aclrtMemcpy (transform_buffer, size,
957
+ (char *)tensor->data + offset, size,
960
958
ACL_MEMCPY_DEVICE_TO_HOST));
961
- ggml_backend_cann_transform_back (tensor, transform_buffer,
962
- (char *)data + offset);
959
+ ggml_backend_cann_transform_back (tensor, transform_buffer, data);
963
960
free (transform_buffer);
964
961
}
965
962
}
@@ -1448,60 +1445,58 @@ ggml_backend_cann_get_default_buffer_type(ggml_backend_t backend) {
1448
1445
* @param size Size of the data to copy in bytes.
1449
1446
*/
1450
1447
GGML_CALL static void ggml_backend_cann_set_tensor_async (ggml_backend_t backend,
1451
- ggml_tensor* tensor,
1452
- const void * data,
1448
+ ggml_tensor * tensor,
1449
+ const void * data,
1453
1450
size_t offset,
1454
1451
size_t size) {
1455
- ggml_backend_cann_context* cann_ctx =
1456
- (ggml_backend_cann_context*)backend->context ;
1452
+ ggml_backend_cann_context * cann_ctx =
1453
+ (ggml_backend_cann_context *)backend->context ;
1457
1454
1458
1455
if (!need_transform (tensor->type )) {
1459
- ACL_CHECK (aclrtMemcpyAsync (
1460
- tensor-> data , size, ( const char *)data + offset, size,
1461
- ACL_MEMCPY_HOST_TO_DEVICE, cann_ctx->stream ()));
1456
+ ACL_CHECK (aclrtMemcpyAsync (( char *)tensor-> data + offset, size, data,
1457
+ size, ACL_MEMCPY_HOST_TO_DEVICE ,
1458
+ cann_ctx->stream ()));
1462
1459
} else {
1463
- void * transform_buffer = malloc (size);
1464
- ggml_backend_cann_transform (tensor, (const char *)data + offset,
1465
- transform_buffer);
1460
+ void *transform_buffer = malloc (size);
1461
+ ggml_backend_cann_transform (tensor, data, transform_buffer);
1466
1462
1467
1463
#ifndef NDEBUG
1468
- void * check_buffer = malloc (size);
1464
+ void * check_buffer = malloc (size);
1469
1465
ggml_backend_cann_transform_back (tensor, transform_buffer,
1470
1466
check_buffer);
1471
- GGML_ASSERT (memcmp (( const char *) data + offset , check_buffer, size));
1467
+ GGML_ASSERT (memcmp (data, check_buffer, size));
1472
1468
free (check_buffer);
1473
1469
#endif
1474
- ACL_CHECK (aclrtMemcpyAsync (tensor-> data , size, transform_buffer, size,
1475
- ACL_MEMCPY_HOST_TO_DEVICE ,
1476
- cann_ctx->stream ()));
1470
+ ACL_CHECK (aclrtMemcpyAsync (
1471
+ ( char *)tensor-> data + offset, size, transform_buffer, size ,
1472
+ ACL_MEMCPY_HOST_TO_DEVICE, cann_ctx->stream ()));
1477
1473
ACL_CHECK (aclrtSynchronizeStream (cann_ctx->stream ()));
1478
1474
free (transform_buffer);
1479
1475
}
1480
1476
}
1481
1477
1482
1478
GGML_CALL static void ggml_backend_cann_get_tensor_async (
1483
- ggml_backend_t backend, const ggml_tensor* tensor, void * data,
1479
+ ggml_backend_t backend, const ggml_tensor * tensor, void * data,
1484
1480
size_t offset, size_t size) {
1485
- ggml_backend_cann_context* cann_ctx =
1486
- (ggml_backend_cann_context*)backend->context ;
1481
+ ggml_backend_cann_context * cann_ctx =
1482
+ (ggml_backend_cann_context *)backend->context ;
1487
1483
ggml_backend_buffer_t buf =
1488
1484
tensor->view_src ? tensor->view_src ->buffer : tensor->buffer ;
1489
1485
1490
1486
GGML_ASSERT (buf->buft == ggml_backend_cann_buffer_type (cann_ctx->device ) &&
1491
1487
" unsupported buffer type" );
1492
1488
1493
1489
if (!need_transform (tensor->type )) {
1494
- ACL_CHECK (aclrtMemcpyAsync (( char *) data + offset , size, tensor->data ,
1490
+ ACL_CHECK (aclrtMemcpyAsync (data, size, ( char *) tensor->data + offset ,
1495
1491
size, ACL_MEMCPY_DEVICE_TO_HOST,
1496
1492
cann_ctx->stream ()));
1497
1493
} else {
1498
- void * transform_buffer = malloc (size);
1499
- ACL_CHECK (aclrtMemcpyAsync (transform_buffer, size, tensor-> data , size,
1500
- ACL_MEMCPY_DEVICE_TO_HOST ,
1501
- cann_ctx->stream ()));
1494
+ void * transform_buffer = malloc (size);
1495
+ ACL_CHECK (aclrtMemcpyAsync (
1496
+ transform_buffer, size, ( char *)tensor-> data + offset, size ,
1497
+ ACL_MEMCPY_DEVICE_TO_HOST, cann_ctx->stream ()));
1502
1498
ACL_CHECK (aclrtSynchronizeStream (cann_ctx->stream ()));
1503
- ggml_backend_cann_transform_back (tensor, transform_buffer,
1504
- (char *)data + offset);
1499
+ ggml_backend_cann_transform_back (tensor, transform_buffer, data);
1505
1500
free (transform_buffer);
1506
1501
}
1507
1502
}
0 commit comments