Skip to content

Commit 0016c0b

Browse files
committed
add op acc
1 parent 2deb900 commit 0016c0b

File tree

5 files changed

+64
-10
lines changed

5 files changed

+64
-10
lines changed

ggml-cann.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,8 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
347347
ggml_cann_add(ctx, dst);
348348
break;
349349
case GGML_OP_ACC:
350-
return false;
350+
ggml_cann_acc(ctx, dst);
351+
break;
351352
case GGML_OP_MUL:
352353
ggml_cann_mul_div<aclnnMulGetWorkspaceSize, aclnnMul>(ctx, dst);
353354
break;
@@ -686,7 +687,7 @@ GGML_CALL static bool ggml_backend_cann_supports_op(ggml_backend_t backend,
686687
case GGML_OP_ARGSORT:
687688
return true;
688689
case GGML_OP_ACC:
689-
return false;
690+
return true;
690691
case GGML_OP_GROUP_NORM:
691692
return true;
692693
case GGML_OP_UPSCALE:

ggml-cann/acl_tensor.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ aclDataType type_mapping(ggml_type type) {
3434
*/
3535
aclTensor* create_acl_tensor(const ggml_tensor* tensor, int64_t* bcast_ne,
3636
size_t* bcast_nb, int64_t bcast_dims,
37-
aclFormat format) {
37+
aclFormat format, size_t offset) {
3838
size_t size = ggml_nbytes(tensor);
3939
void* deviceAddr = nullptr;
4040

@@ -55,23 +55,23 @@ aclTensor* create_acl_tensor(const ggml_tensor* tensor, int64_t* bcast_ne,
5555
for (int i = 0; i < GGML_MAX_DIMS; i++) {
5656
acl_ne[i] = tensor->ne[i];
5757
// The step size of acl is in elements.
58-
acl_stride[i] = tensor->nb[i] / ggml_type_size(tensor->type);
58+
acl_stride[i] = tensor->nb[i] / ggml_element_size(tensor);
5959
}
6060
} else {
6161
// With bcast
6262
for (int i = 0; i < bcast_dims; i++) {
6363
acl_ne[i] = bcast_ne[i];
64-
acl_stride[i] = bcast_nb[i] / ggml_type_size(tensor->type);
64+
acl_stride[i] = bcast_nb[i] / ggml_element_size(tensor);
6565
}
6666
}
6767

6868
int64_t dims = (bcast_dims == 0 ? GGML_MAX_DIMS : bcast_dims);
6969
std::reverse(acl_ne, acl_ne + dims);
7070
std::reverse(acl_stride, acl_stride + dims);
7171

72-
aclTensor* acl_tensor =
73-
aclCreateTensor(acl_ne, dims, type_mapping(tensor->type), acl_stride, 0,
74-
format, acl_ne, dims, deviceAddr);
72+
aclTensor* acl_tensor = aclCreateTensor(
73+
acl_ne, dims, type_mapping(tensor->type), acl_stride,
74+
offset / ggml_element_size(tensor), format, acl_ne, dims, deviceAddr);
7575

7676
return acl_tensor;
7777
}

ggml-cann/acl_tensor.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ aclDataType type_mapping(ggml_type type);
1111
aclTensor* create_acl_tensor(const ggml_tensor* tensor,
1212
int64_t* bcast_ne = nullptr,
1313
size_t* bcast_nb = nullptr, int64_t bcast_dims = 0,
14-
aclFormat format = ACL_FORMAT_ND);
14+
aclFormat format = ACL_FORMAT_ND, size_t offset = 0);
1515

1616
aclTensor* create_acl_tensor(void* data_ptr, aclDataType dtype,
1717
size_t type_size, int64_t* ne, size_t* nb,

ggml-cann/aclnn_ops.cpp

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,4 +424,55 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
424424
ACL_CHECK(aclDestroyTensor(acl_dst));
425425
}
426426

427-
void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst) {}
427+
void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
428+
ggml_tensor* src0 = dst->src[0];
429+
ggml_tensor* src1 = dst->src[1];
430+
431+
size_t nb1 = ((int32_t*)dst->op_params)[0];
432+
size_t nb2 = ((int32_t*)dst->op_params)[1];
433+
size_t nb3 = ((int32_t*)dst->op_params)[2];
434+
size_t offset = ((int32_t*)dst->op_params)[3];
435+
bool inplace = (bool)((int32_t*)dst->op_params)[4];
436+
437+
size_t param_nb[] = {ggml_element_size(src0), nb1, nb2, nb3};
438+
439+
aclTensor* acl_dst = create_acl_tensor(
440+
dst, src1->ne, param_nb, GGML_MAX_DIMS, ACL_FORMAT_ND, offset);
441+
aclTensor* acl_src1 = create_acl_tensor(src1);
442+
443+
aclScalar* alpha = nullptr;
444+
float alphaValue = 1.0f;
445+
alpha = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
446+
447+
uint64_t workspaceSize = 0;
448+
aclOpExecutor* executor;
449+
void* workspaceAddr = nullptr;
450+
451+
aclrtStream stream = ctx.stream();
452+
453+
if (!inplace) {
454+
size_t cpy_size = ggml_nbytes(dst);
455+
ACL_CHECK(aclrtMemcpyAsync(dst->data, cpy_size, src0->data, cpy_size,
456+
ACL_MEMCPY_DEVICE_TO_DEVICE, stream));
457+
aclTensor* acl_src0 = create_acl_tensor(
458+
src0, src1->ne, src0->nb, GGML_MAX_DIMS, ACL_FORMAT_ND, offset);
459+
ACL_CHECK(aclnnAddGetWorkspaceSize(acl_src0, acl_src1, alpha, acl_dst,
460+
&workspaceSize, &executor));
461+
if (workspaceSize > 0) {
462+
workspaceAddr = ctx.alloc_buffer(workspaceSize);
463+
}
464+
ACL_CHECK(aclnnAdd(workspaceAddr, workspaceSize, executor, stream));
465+
ACL_CHECK(aclDestroyTensor(acl_src0));
466+
} else {
467+
ACL_CHECK(aclnnInplaceAddGetWorkspaceSize(acl_dst, acl_src1, alpha,
468+
&workspaceSize, &executor));
469+
if (workspaceSize > 0) {
470+
workspaceAddr = ctx.alloc_buffer(workspaceSize);
471+
}
472+
ACL_CHECK(
473+
aclnnInplaceAdd(workspaceAddr, workspaceSize, executor, stream));
474+
}
475+
476+
ACL_CHECK(aclDestroyTensor(acl_src1));
477+
ACL_CHECK(aclDestroyTensor(acl_dst));
478+
}

ggml-cann/aclnn_ops.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
4343

4444
void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
4545

46+
void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst);
47+
4648
template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*,
4749
aclTensor*, uint64_t*, aclOpExecutor**),
4850
aclnnStatus execute(void*, uint64_t, aclOpExecutor*, aclrtStream)>

0 commit comments

Comments
 (0)