Skip to content

Commit 7932b93

Browse files
committed
CANN: Fix memory waste in aclnn_tensor
1 parent 9bacd6b commit 7932b93

File tree

2 files changed

+11
-9
lines changed

2 files changed

+11
-9
lines changed

ggml/src/ggml-cann/acl_tensor.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,7 @@ aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne,
5454
// added.
5555
int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2];
5656

57-
int64_t acl_storage_len = 0;
5857
if (ne == nullptr) {
59-
acl_storage_len = ggml_nbytes(tensor);
6058
for (int i = 0; i < GGML_MAX_DIMS; i++) {
6159
acl_ne[i] = tensor->ne[i];
6260
// The step size of acl is in elements.
@@ -65,14 +63,18 @@ aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne,
6563
} else {
6664
// With bcast
6765
for (int i = 0; i < dims; i++) {
68-
acl_storage_len += (ne[i] - 1) * nb[i];
6966
acl_ne[i] = ne[i];
7067
acl_stride[i] = nb[i] / ggml_element_size(tensor);
7168
}
7269
}
7370

74-
// Reverse ne and stride.
7571
int64_t final_dims = (dims == 0 ? GGML_MAX_DIMS : dims);
72+
int64_t acl_storage_len = ggml_element_size(tensor);
73+
for (int i = 0; i < final_dims; i++) {
74+
acl_storage_len += (acl_ne[i] - 1) * acl_stride[i];
75+
}
76+
77+
// Reverse ne and stride.
7678
std::reverse(acl_ne, acl_ne + final_dims);
7779
std::reverse(acl_stride, acl_stride + final_dims);
7880

ggml/src/ggml-cann/acl_tensor.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -101,14 +101,14 @@ aclTensor* ggml_cann_create_tensor(void* data_ptr, aclDataType dtype,
101101
tmp_stride[i] = nb[i] / type_size;
102102
}
103103

104-
std::reverse(tmp_ne, tmp_ne + dims);
105-
std::reverse(tmp_stride, tmp_stride + dims);
106-
107-
int64_t acl_storage_len = 0;
104+
int64_t acl_storage_len = 1;
108105
for (int i = 0; i < dims; i++) {
109-
acl_storage_len += (ne[i] - 1) * nb[i];
106+
acl_storage_len += (tmp_ne[i] - 1) * tmp_stride[i] * type_size;
110107
}
111108

109+
std::reverse(tmp_ne, tmp_ne + dims);
110+
std::reverse(tmp_stride, tmp_stride + dims);
111+
112112
aclTensor* acl_tensor =
113113
aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, offset / type_size,
114114
format, &acl_storage_len, 1, data_ptr);

0 commit comments

Comments
 (0)