Skip to content

Commit 712ffc8

Browse files
author
noemotiovon
committed
[CANN] Support ELU and CONV_TRANSPOSE_1D
Signed-off-by: noemotiovon <[email protected]>
1 parent d0d5b22 commit 712ffc8

File tree

3 files changed

+181
-67
lines changed

3 files changed

+181
-67
lines changed

ggml/src/ggml-cann/aclnn_ops.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@
5757
#include <aclnnop/aclnn_sub.h>
5858
#include <aclnnop/aclnn_mul.h>
5959
#include <aclnnop/aclnn_div.h>
60+
#include <aclnnop/aclnn_convolution.h>
61+
#include <aclnnop/aclnn_elu.h>
6062
#include <float.h>
6163

6264
#include <cmath>
@@ -2585,3 +2587,55 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
25852587
ACL_CHECK(aclDestroyTensor(acl_src));
25862588
ACL_CHECK(aclDestroyTensor(acl_dst));
25872589
}
2590+
2591+
void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst){
2592+
ggml_tensor * src0 = dst->src[0];
2593+
ggml_tensor * src1 = dst->src[1];
2594+
2595+
// stride
2596+
int64_t s0 = ((const int32_t*)(dst->op_params))[0];
2597+
2598+
aclTensor* acl_input = ggml_cann_create_tensor(src1, src1->ne, src1->nb, 3, ACL_FORMAT_NCL);
2599+
aclTensor* acl_weight = ggml_cann_create_tensor(src0, src0->ne, src0->nb, 3, ACL_FORMAT_NCL);
2600+
aclTensor* acl_dst = ggml_cann_create_tensor(dst, dst->ne, dst->nb, 3, ACL_FORMAT_NCL);
2601+
2602+
int64_t strideVal[1];
2603+
strideVal[0] = s0;
2604+
aclIntArray *stride = aclCreateIntArray(strideVal, 1);
2605+
int64_t paddingVal[] = {0};
2606+
aclIntArray *padding = aclCreateIntArray(paddingVal, 1);
2607+
int64_t dilationVal[] = {1};
2608+
aclIntArray *dilation = aclCreateIntArray(dilationVal, 1);
2609+
int64_t outputPaddingVal[] = {0};
2610+
aclIntArray *outputPadding = aclCreateIntArray(outputPaddingVal, 1);
2611+
bool transposed = true;
2612+
int64_t groups = 1;
2613+
int8_t cubeMathType = 0;
2614+
2615+
GGML_CANN_CALL_ACLNN_OP(Convolution, acl_input, acl_weight, nullptr, stride,
2616+
padding, dilation, transposed, outputPadding, groups, acl_dst, cubeMathType);
2617+
2618+
ACL_CHECK(aclDestroyTensor(acl_weight));
2619+
ACL_CHECK(aclDestroyTensor(acl_dst));
2620+
}
2621+
2622+
void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst){
2623+
ggml_tensor * src0 = dst->src[0];
2624+
2625+
aclTensor* acl_input = ggml_cann_create_tensor(src0);
2626+
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
2627+
2628+
float ONE = 1.0f;
2629+
aclScalar* alpha = nullptr;
2630+
alpha = aclCreateScalar(&ONE, aclDataType::ACL_FLOAT);
2631+
aclScalar* scale = nullptr;
2632+
scale = aclCreateScalar(&ONE, aclDataType::ACL_FLOAT);
2633+
aclScalar* inputScale = nullptr;
2634+
inputScale = aclCreateScalar(&ONE, aclDataType::ACL_FLOAT);
2635+
2636+
GGML_CANN_CALL_ACLNN_OP(Elu, acl_input, alpha, scale, inputScale,
2637+
acl_dst);
2638+
2639+
ACL_CHECK(aclDestroyTensor(acl_input));
2640+
ACL_CHECK(aclDestroyTensor(acl_dst));
2641+
}

ggml/src/ggml-cann/aclnn_ops.h

Lines changed: 114 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,4 @@
1-
#ifndef CANN_ACLNN_OPS
2-
#define CANN_ACLNN_OPS
3-
41
/**
5-
* @file acl_tensor
6-
* @brief This file contains related functions of ggml_tensor and acl_tensor.
7-
* Contains conversion from ggml_tensor to acl_tensor, broadcast and other
8-
* functions.
9-
* @author hipudding <[email protected]>
10-
* @author wangshuai09 <[email protected]>
11-
* @date July 15, 2024
12-
*
132
* Copyright (c) 2023-2024 The ggml authors
143
*
154
* Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -31,6 +20,9 @@
3120
* IN THE SOFTWARE.
3221
*/
3322

23+
#ifndef CANN_ACLNN_OPS
24+
#define CANN_ACLNN_OPS
25+
3426
#include <aclnnop/aclnn_abs.h>
3527
#include <aclnnop/aclnn_neg.h>
3628
#include <aclnnop/aclnn_exp.h>
@@ -483,8 +475,8 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
483475
* operation is executed using the CANN backend for optimized performance.
484476
*
485477
* @param ctx The CANN context used for operations.
486-
* @param dst The destination tensor where the indices of the maximum values will be stored.
487-
* dst->op is `GGML_OP_ARGMAX`.
478+
* @param dst The destination tensor where the indices of the maximum values will
479+
* be stored. dst->op is `GGML_OP_ARGMAX`.
488480
*/
489481
void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
490482

@@ -600,40 +592,8 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
600592
aclTensor* acl_dst);
601593

602594
/**
603-
* @brief Launches an asynchronous task using the memory allocator.
604-
*
605-
* This macro submit an asynchronous task on the specified stream.
606-
* The task uses memory allocated by the allocator. It is guaranteed
607-
* that the memory will not be accessed by other tasks until this task
608-
* completes, due to the sequential execution order within the same stream.
609-
*
610-
* @param OP_NAME aclnn operator name.
611-
* @param args Additional arguments required by the task.
612-
*
613-
* @note
614-
* Memory from the allocator will be "freed" immediately and can be
615-
* reallocated to other pointers. However, it won't be accessed by any
616-
* other task before this asynchronous task ends, because all tasks in the
617-
* same stream are executed in queue order.
618-
*/
619-
#define GGML_CANN_CALL_ACLNN_OP(OP_NAME, ...) \
620-
do { \
621-
uint64_t workspaceSize = 0; \
622-
aclOpExecutor * executor; \
623-
void * workspaceAddr = nullptr; \
624-
\
625-
ACL_CHECK(aclnn##OP_NAME##GetWorkspaceSize(__VA_ARGS__, &workspaceSize, &executor)); \
626-
\
627-
if (workspaceSize > 0) { \
628-
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize); \
629-
workspaceAddr = workspace_allocator.get(); \
630-
} \
631-
ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, ctx.stream())); \
632-
} while (0)
633-
634-
635-
/**
636-
* @brief Prepares broadcast-compatible ACL tensors for two input tensors and one output tensor.
595+
* @brief Prepares broadcast-compatible ACL tensors for two input tensors and one
596+
* output tensor.
637597
*
638598
* This function checks whether broadcasting is needed between `src0` and `src1`.
639599
* If broadcasting is required, it calculates the proper shapes and creates
@@ -647,14 +607,57 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
647607
* @param acl_src1 Output pointer to the created ACL tensor corresponding to src1.
648608
* @param acl_dst Output pointer to the created ACL tensor corresponding to dst.
649609
*/
650-
void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst, aclTensor ** acl_src0,
651-
aclTensor ** acl_src1, aclTensor ** acl_dst);
610+
void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst,
611+
aclTensor ** acl_src0, aclTensor ** acl_src1, aclTensor ** acl_dst);
612+
613+
/**
614+
* @brief Computes the 1D transposed convolution (deconvolution) of a ggml
615+
* tensor using the CANN backend.
616+
*
617+
* @details This function performs a 1D transposed convolution (also known as
618+
* deconvolution) operation on the input tensor. The computed result is stored
619+
* in the destination tensor `dst`. The operation is optimized using the CANN
620+
* backend for improved performance.
621+
*
622+
* @param ctx The CANN context used for operations.
623+
* @param dst The destination tensor where the transposed convolution result
624+
* will be stored. dst->op is `GGML_OP_CONV_TRANSPOSE_1D`.
625+
*/
626+
void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
627+
628+
/**
629+
* @brief Applies the ELU (Exponential Linear Unit) activation to a ggml tensor
630+
* using the CANN backend.
631+
*
632+
* @details This function performs an element-wise ELU activation on the input
633+
* tensor.
634+
* The result is written to the destination tensor `dst` in-place.
635+
* The ELU function is defined as:
636+
*
637+
* \text{ELU}(x) =
638+
* \begin{cases}
639+
* x, & \text{if } x > 0 \\
640+
* \alpha \left( \exp(x) - 1 \right), & \text{if } x \leq 0
641+
* \end{cases}
642+
*
643+
* where α (alpha) is a hyperparameter, typically set to 1.0.
644+
* This operation is optimized using the CANN backend for high-performance
645+
* inference or training.
646+
*
647+
* @param ctx The CANN context used for operations.
648+
* @param dst The destination tensor where the ELU-activated result will be stored.
649+
* dst->op is expected to be `GGML_OP_ELU`.
650+
*/
651+
void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
652652

653653
/**
654-
* @brief Applies a element-wise operation to two input tensors using the CANN backend.
654+
* @brief Applies a element-wise operation to two input tensors using the CANN
655+
* backend.
655656
*
656-
* This templated function takes a binary operator and applies it to two source tensors
657-
* associated with the destination tensor. The function handles broadcasting as needed.
657+
* This templated function takes a binary operator and applies it to two source
658+
* tensors
659+
* associated with the destination tensor. The function handles broadcasting as
660+
* needed.
658661
*
659662
* @tparam binary_op A callable object (e.g., lambda or function pointer) representing
660663
* the binary operation to be performed. It must take three arguments:
@@ -681,6 +684,38 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
681684
ACL_CHECK(aclDestroyTensor(acl_dst));
682685
}
683686

687+
/**
688+
* @brief Launches an asynchronous task using the memory allocator.
689+
*
690+
* This macro submit an asynchronous task on the specified stream.
691+
* The task uses memory allocated by the allocator. It is guaranteed
692+
* that the memory will not be accessed by other tasks until this task
693+
* completes, due to the sequential execution order within the same stream.
694+
*
695+
* @param OP_NAME aclnn operator name.
696+
* @param args Additional arguments required by the task.
697+
*
698+
* @note
699+
* Memory from the allocator will be "freed" immediately and can be
700+
* reallocated to other pointers. However, it won't be accessed by any
701+
* other task before this asynchronous task ends, because all tasks in the
702+
* same stream are executed in queue order.
703+
*/
704+
#define GGML_CANN_CALL_ACLNN_OP(OP_NAME, ...) \
705+
do { \
706+
uint64_t workspaceSize = 0; \
707+
aclOpExecutor * executor; \
708+
void * workspaceAddr = nullptr; \
709+
\
710+
ACL_CHECK(aclnn##OP_NAME##GetWorkspaceSize(__VA_ARGS__, &workspaceSize, &executor)); \
711+
\
712+
if (workspaceSize > 0) { \
713+
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize); \
714+
workspaceAddr = workspace_allocator.get(); \
715+
} \
716+
ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, ctx.stream())); \
717+
} while (0)
718+
684719
/**
685720
* @brief Applies a unary operation to an input tensor using the CANN backend.
686721
*
@@ -690,12 +725,13 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
690725
* @tparam unary_op A callable with the signature:
691726
* void(ggml_backend_cann_context&, aclTensor*, aclTensor*)
692727
* where the first aclTensor is the source and the second is the destination.
693-
*
728+
* @param unary_op function unary_op.
694729
* @param ctx The CANN backend context for managing resources and execution.
695730
* @param dst The destination tensor. Its src[0] is treated as the input tensor.
696731
*/
697-
template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
698-
void ggml_cann_unary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
732+
using unary_func_ptr = void (*)(ggml_backend_cann_context&, aclTensor*, aclTensor*);
733+
734+
static void ggml_cann_unary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst, unary_func_ptr unary_op) {
699735
ggml_tensor* src = dst->src[0];
700736

701737
aclTensor* acl_src = ggml_cann_create_tensor(src);
@@ -706,6 +742,28 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
706742
ACL_CHECK(aclDestroyTensor(acl_dst));
707743
}
708744

745+
#define DEFINE_ACLNN_WRAPPER(OP_NAME) \
746+
static void aclnn_##OP_NAME##_wrapper(ggml_backend_cann_context& ctx, \
747+
aclTensor* src, aclTensor* dst) { \
748+
GGML_CANN_CALL_ACLNN_OP(OP_NAME, src, dst); \
749+
}
750+
751+
DEFINE_ACLNN_WRAPPER(Abs)
752+
DEFINE_ACLNN_WRAPPER(Neg)
753+
DEFINE_ACLNN_WRAPPER(Gelu)
754+
DEFINE_ACLNN_WRAPPER(Silu)
755+
DEFINE_ACLNN_WRAPPER(Tanh)
756+
DEFINE_ACLNN_WRAPPER(Relu)
757+
DEFINE_ACLNN_WRAPPER(Sigmoid)
758+
DEFINE_ACLNN_WRAPPER(Hardsigmoid)
759+
DEFINE_ACLNN_WRAPPER(Hardswish)
760+
DEFINE_ACLNN_WRAPPER(Exp)
761+
DEFINE_ACLNN_WRAPPER(Sqrt)
762+
763+
static void aclnn_GeluV2_wrapper(ggml_backend_cann_context& ctx, aclTensor* src, aclTensor* dst) {
764+
GGML_CANN_CALL_ACLNN_OP(GeluV2, src, 0, dst);
765+
}
766+
709767
/**
710768
* @brief Helper macro to invoke a unary ACL operation using ggml_cann_unary_op.
711769
*
@@ -725,10 +783,7 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
725783
*/
726784
#define GGML_CANN_CALL_UNARY_OP(OP_NAME) \
727785
do { \
728-
auto lambda = [](auto ctx, auto acl_src, auto acl_dst) { \
729-
GGML_CANN_CALL_ACLNN_OP(OP_NAME, acl_src, acl_dst); \
730-
}; \
731-
ggml_cann_unary_op<lambda>(ctx, dst); \
786+
ggml_cann_unary_op(ctx, dst, aclnn_##OP_NAME##_wrapper); \
732787
} \
733788
while (0)
734789

ggml/src/ggml-cann/ggml-cann.cpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1330,12 +1330,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
13301330
GGML_CANN_CALL_UNARY_OP(Silu);
13311331
break;
13321332
case GGML_UNARY_OP_GELU_QUICK: {
1333-
auto lambda = [](auto ctx, auto acl_src, auto acl_dst) {
1334-
GGML_CANN_CALL_ACLNN_OP(GeluV2, acl_src, 0, acl_dst);
1335-
};
1336-
ggml_cann_unary_op<lambda>(ctx, dst);
1337-
}
1333+
GGML_CANN_CALL_UNARY_OP(GeluV2);
13381334
break;
1335+
}
13391336
case GGML_UNARY_OP_TANH:
13401337
GGML_CANN_CALL_UNARY_OP(Tanh);
13411338
break;
@@ -1354,6 +1351,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
13541351
case GGML_UNARY_OP_EXP:
13551352
GGML_CANN_CALL_UNARY_OP(Exp);
13561353
break;
1354+
case GGML_UNARY_OP_ELU:
1355+
ggml_cann_elu(ctx, dst);
1356+
break;
13571357
default:
13581358
return false;
13591359
}
@@ -1444,11 +1444,14 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
14441444
ggml_cann_argmax(ctx, dst);
14451445
break;
14461446
case GGML_OP_COS:
1447-
ggml_cann_unary_op<aclnn_cos>(ctx, dst);
1447+
ggml_cann_unary_op(ctx, dst, aclnn_cos);
14481448
break;
14491449
case GGML_OP_SIN:
1450-
ggml_cann_unary_op<aclnn_sin>(ctx, dst);
1451-
break;
1450+
ggml_cann_unary_op(ctx, dst, aclnn_sin);
1451+
break;
1452+
case GGML_OP_CONV_TRANSPOSE_1D:
1453+
ggml_cann_conv_transpose_1d(ctx, dst);
1454+
break;
14521455
default:
14531456
return false;
14541457
}
@@ -1710,6 +1713,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
17101713
case GGML_UNARY_OP_GELU_QUICK:
17111714
case GGML_UNARY_OP_TANH:
17121715
case GGML_UNARY_OP_EXP:
1716+
case GGML_UNARY_OP_ELU:
17131717
return true;
17141718
default:
17151719
return false;
@@ -1842,6 +1846,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
18421846
case GGML_OP_ARGMAX:
18431847
case GGML_OP_COS:
18441848
case GGML_OP_SIN:
1849+
case GGML_OP_CONV_TRANSPOSE_1D:
18451850
return true;
18461851
default:
18471852
return false;

0 commit comments

Comments
 (0)