1
- #ifndef CANN_ACLNN_OPS
2
- #define CANN_ACLNN_OPS
3
-
4
1
/* *
5
- * @file acl_tensor
6
- * @brief This file contains related functions of ggml_tensor and acl_tensor.
7
- * Contains conversion from ggml_tensor to acl_tensor, broadcast and other
8
- * functions.
9
- * @author hipudding <[email protected] >
10
- * @author wangshuai09 <[email protected] >
11
- * @date July 15, 2024
12
- *
13
2
* Copyright (c) 2023-2024 The ggml authors
14
3
*
15
4
* Permission is hereby granted, free of charge, to any person obtaining a copy
31
20
* IN THE SOFTWARE.
32
21
*/
33
22
23
+ #ifndef CANN_ACLNN_OPS
24
+ #define CANN_ACLNN_OPS
25
+
34
26
#include < aclnnop/aclnn_abs.h>
35
27
#include < aclnnop/aclnn_neg.h>
36
28
#include < aclnnop/aclnn_exp.h>
@@ -483,8 +475,8 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
483
475
* operation is executed using the CANN backend for optimized performance.
484
476
*
485
477
* @param ctx The CANN context used for operations.
486
- * @param dst The destination tensor where the indices of the maximum values will be stored.
487
- * dst->op is `GGML_OP_ARGMAX`.
478
+ * @param dst The destination tensor where the indices of the maximum values will
479
+ * be stored. dst->op is `GGML_OP_ARGMAX`.
488
480
*/
489
481
void ggml_cann_argmax (ggml_backend_cann_context& ctx, ggml_tensor* dst);
490
482
@@ -600,40 +592,8 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
600
592
aclTensor* acl_dst);
601
593
602
594
/* *
603
- * @brief Launches an asynchronous task using the memory allocator.
604
- *
605
- * This macro submit an asynchronous task on the specified stream.
606
- * The task uses memory allocated by the allocator. It is guaranteed
607
- * that the memory will not be accessed by other tasks until this task
608
- * completes, due to the sequential execution order within the same stream.
609
- *
610
- * @param OP_NAME aclnn operator name.
611
- * @param args Additional arguments required by the task.
612
- *
613
- * @note
614
- * Memory from the allocator will be "freed" immediately and can be
615
- * reallocated to other pointers. However, it won't be accessed by any
616
- * other task before this asynchronous task ends, because all tasks in the
617
- * same stream are executed in queue order.
618
- */
619
- #define GGML_CANN_CALL_ACLNN_OP (OP_NAME, ...) \
620
- do { \
621
- uint64_t workspaceSize = 0 ; \
622
- aclOpExecutor * executor; \
623
- void * workspaceAddr = nullptr ; \
624
- \
625
- ACL_CHECK (aclnn##OP_NAME##GetWorkspaceSize (__VA_ARGS__, &workspaceSize, &executor)); \
626
- \
627
- if (workspaceSize > 0 ) { \
628
- ggml_cann_pool_alloc workspace_allocator (ctx.pool (), workspaceSize); \
629
- workspaceAddr = workspace_allocator.get (); \
630
- } \
631
- ACL_CHECK (aclnn##OP_NAME (workspaceAddr, workspaceSize, executor, ctx.stream ())); \
632
- } while (0 )
633
-
634
-
635
- /* *
636
- * @brief Prepares broadcast-compatible ACL tensors for two input tensors and one output tensor.
595
+ * @brief Prepares broadcast-compatible ACL tensors for two input tensors and one
596
+ * output tensor.
637
597
*
638
598
* This function checks whether broadcasting is needed between `src0` and `src1`.
639
599
* If broadcasting is required, it calculates the proper shapes and creates
@@ -647,14 +607,57 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
647
607
* @param acl_src1 Output pointer to the created ACL tensor corresponding to src1.
648
608
* @param acl_dst Output pointer to the created ACL tensor corresponding to dst.
649
609
*/
650
- void bcast_shape (ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst, aclTensor ** acl_src0,
651
- aclTensor ** acl_src1, aclTensor ** acl_dst);
610
+ void bcast_shape (ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst,
611
+ aclTensor ** acl_src0, aclTensor ** acl_src1, aclTensor ** acl_dst);
612
+
613
+ /* *
614
+ * @brief Computes the 1D transposed convolution (deconvolution) of a ggml
615
+ * tensor using the CANN backend.
616
+ *
617
+ * @details This function performs a 1D transposed convolution (also known as
618
+ * deconvolution) operation on the input tensor. The computed result is stored
619
+ * in the destination tensor `dst`. The operation is optimized using the CANN
620
+ * backend for improved performance.
621
+ *
622
+ * @param ctx The CANN context used for operations.
623
+ * @param dst The destination tensor where the transposed convolution result
624
+ * will be stored. dst->op is `GGML_OP_CONV_TRANSPOSE_1D`.
625
+ */
626
+ void ggml_cann_conv_transpose_1d (ggml_backend_cann_context& ctx, ggml_tensor* dst);
627
+
628
+ /* *
629
+ * @brief Applies the ELU (Exponential Linear Unit) activation to a ggml tensor
630
+ * using the CANN backend.
631
+ *
632
+ * @details This function performs an element-wise ELU activation on the input
633
+ * tensor.
634
+ * The result is written to the destination tensor `dst` in-place.
635
+ * The ELU function is defined as:
636
+ *
637
+ * \text{ELU}(x) =
638
+ * \begin{cases}
639
+ * x, & \text{if } x > 0 \\
640
+ * \alpha \left( \exp(x) - 1 \right), & \text{if } x \leq 0
641
+ * \end{cases}
642
+ *
643
+ * where α (alpha) is a hyperparameter, typically set to 1.0.
644
+ * This operation is optimized using the CANN backend for high-performance
645
+ * inference or training.
646
+ *
647
+ * @param ctx The CANN context used for operations.
648
+ * @param dst The destination tensor where the ELU-activated result will be stored.
649
+ * dst->op is expected to be `GGML_OP_ELU`.
650
+ */
651
+ void ggml_cann_elu (ggml_backend_cann_context& ctx, ggml_tensor* dst);
652
652
653
653
/* *
654
- * @brief Applies a element-wise operation to two input tensors using the CANN backend.
654
+ * @brief Applies a element-wise operation to two input tensors using the CANN
655
+ * backend.
655
656
*
656
- * This templated function takes a binary operator and applies it to two source tensors
657
- * associated with the destination tensor. The function handles broadcasting as needed.
657
+ * This templated function takes a binary operator and applies it to two source
658
+ * tensors
659
+ * associated with the destination tensor. The function handles broadcasting as
660
+ * needed.
658
661
*
659
662
* @tparam binary_op A callable object (e.g., lambda or function pointer) representing
660
663
* the binary operation to be performed. It must take three arguments:
@@ -681,6 +684,38 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
681
684
ACL_CHECK (aclDestroyTensor (acl_dst));
682
685
}
683
686
687
+ /* *
688
+ * @brief Launches an asynchronous task using the memory allocator.
689
+ *
690
+ * This macro submit an asynchronous task on the specified stream.
691
+ * The task uses memory allocated by the allocator. It is guaranteed
692
+ * that the memory will not be accessed by other tasks until this task
693
+ * completes, due to the sequential execution order within the same stream.
694
+ *
695
+ * @param OP_NAME aclnn operator name.
696
+ * @param args Additional arguments required by the task.
697
+ *
698
+ * @note
699
+ * Memory from the allocator will be "freed" immediately and can be
700
+ * reallocated to other pointers. However, it won't be accessed by any
701
+ * other task before this asynchronous task ends, because all tasks in the
702
+ * same stream are executed in queue order.
703
+ */
704
+ #define GGML_CANN_CALL_ACLNN_OP (OP_NAME, ...) \
705
+ do { \
706
+ uint64_t workspaceSize = 0 ; \
707
+ aclOpExecutor * executor; \
708
+ void * workspaceAddr = nullptr ; \
709
+ \
710
+ ACL_CHECK (aclnn##OP_NAME##GetWorkspaceSize (__VA_ARGS__, &workspaceSize, &executor)); \
711
+ \
712
+ if (workspaceSize > 0 ) { \
713
+ ggml_cann_pool_alloc workspace_allocator (ctx.pool (), workspaceSize); \
714
+ workspaceAddr = workspace_allocator.get (); \
715
+ } \
716
+ ACL_CHECK (aclnn##OP_NAME (workspaceAddr, workspaceSize, executor, ctx.stream ())); \
717
+ } while (0 )
718
+
684
719
/* *
685
720
* @brief Applies a unary operation to an input tensor using the CANN backend.
686
721
*
@@ -690,12 +725,13 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
690
725
* @tparam unary_op A callable with the signature:
691
726
* void(ggml_backend_cann_context&, aclTensor*, aclTensor*)
692
727
* where the first aclTensor is the source and the second is the destination.
693
- *
728
+ * @param unary_op function unary_op.
694
729
* @param ctx The CANN backend context for managing resources and execution.
695
730
* @param dst The destination tensor. Its src[0] is treated as the input tensor.
696
731
*/
697
- template <void unary_op (ggml_backend_cann_context&, aclTensor*, aclTensor*)>
698
- void ggml_cann_unary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
732
+ using unary_func_ptr = void (*)(ggml_backend_cann_context&, aclTensor*, aclTensor*);
733
+
734
+ static void ggml_cann_unary_op (ggml_backend_cann_context& ctx, ggml_tensor* dst, unary_func_ptr unary_op) {
699
735
ggml_tensor* src = dst->src [0 ];
700
736
701
737
aclTensor* acl_src = ggml_cann_create_tensor (src);
@@ -706,6 +742,28 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
706
742
ACL_CHECK (aclDestroyTensor (acl_dst));
707
743
}
708
744
745
+ #define DEFINE_ACLNN_WRAPPER (OP_NAME ) \
746
+ static void aclnn_##OP_NAME##_wrapper(ggml_backend_cann_context& ctx, \
747
+ aclTensor* src, aclTensor* dst) { \
748
+ GGML_CANN_CALL_ACLNN_OP (OP_NAME, src, dst); \
749
+ }
750
+
751
+ DEFINE_ACLNN_WRAPPER (Abs)
752
+ DEFINE_ACLNN_WRAPPER(Neg)
753
+ DEFINE_ACLNN_WRAPPER(Gelu)
754
+ DEFINE_ACLNN_WRAPPER(Silu)
755
+ DEFINE_ACLNN_WRAPPER(Tanh)
756
+ DEFINE_ACLNN_WRAPPER(Relu)
757
+ DEFINE_ACLNN_WRAPPER(Sigmoid)
758
+ DEFINE_ACLNN_WRAPPER(Hardsigmoid)
759
+ DEFINE_ACLNN_WRAPPER(Hardswish)
760
+ DEFINE_ACLNN_WRAPPER(Exp)
761
+ DEFINE_ACLNN_WRAPPER(Sqrt)
762
+
763
+ static void aclnn_GeluV2_wrapper(ggml_backend_cann_context& ctx, aclTensor* src, aclTensor* dst) {
764
+ GGML_CANN_CALL_ACLNN_OP (GeluV2, src, 0 , dst);
765
+ }
766
+
709
767
/* *
710
768
* @brief Helper macro to invoke a unary ACL operation using ggml_cann_unary_op.
711
769
*
@@ -725,10 +783,7 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
725
783
*/
726
784
#define GGML_CANN_CALL_UNARY_OP (OP_NAME ) \
727
785
do { \
728
- auto lambda = [](auto ctx, auto acl_src, auto acl_dst) { \
729
- GGML_CANN_CALL_ACLNN_OP (OP_NAME, acl_src, acl_dst); \
730
- }; \
731
- ggml_cann_unary_op<lambda>(ctx, dst); \
786
+ ggml_cann_unary_op (ctx, dst, aclnn_##OP_NAME##_wrapper); \
732
787
} \
733
788
while (0 )
734
789
0 commit comments