1
- #ifndef CANN_ACLNN_OPS
2
- #define CANN_ACLNN_OPS
3
-
4
1
/* *
5
- * @file acl_tensor
6
- * @brief This file contains related functions of ggml_tensor and acl_tensor.
7
- * Contains conversion from ggml_tensor to acl_tensor, broadcast and other
8
- * functions.
9
- * @author hipudding <[email protected] >
10
- * @author wangshuai09 <[email protected] >
11
- * @date July 15, 2024
12
- *
13
2
* Copyright (c) 2023-2024 The ggml authors
14
3
*
15
4
* Permission is hereby granted, free of charge, to any person obtaining a copy
31
20
* IN THE SOFTWARE.
32
21
*/
33
22
23
+ #ifndef CANN_ACLNN_OPS
24
+ #define CANN_ACLNN_OPS
25
+
34
26
#include < aclnnop/aclnn_abs.h>
35
27
#include < aclnnop/aclnn_neg.h>
36
28
#include < aclnnop/aclnn_exp.h>
@@ -483,8 +475,8 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
483
475
* operation is executed using the CANN backend for optimized performance.
484
476
*
485
477
* @param ctx The CANN context used for operations.
486
- * @param dst The destination tensor where the indices of the maximum values will be stored.
487
- * dst->op is `GGML_OP_ARGMAX`.
478
+ * @param dst The destination tensor where the indices of the maximum values will
479
+ * be stored. dst->op is `GGML_OP_ARGMAX`.
488
480
*/
489
481
void ggml_cann_argmax (ggml_backend_cann_context& ctx, ggml_tensor* dst);
490
482
@@ -600,40 +592,8 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
600
592
aclTensor* acl_dst);
601
593
602
594
/* *
603
- * @brief Launches an asynchronous task using the memory allocator.
604
- *
605
- * This macro submit an asynchronous task on the specified stream.
606
- * The task uses memory allocated by the allocator. It is guaranteed
607
- * that the memory will not be accessed by other tasks until this task
608
- * completes, due to the sequential execution order within the same stream.
609
- *
610
- * @param OP_NAME aclnn operator name.
611
- * @param args Additional arguments required by the task.
612
- *
613
- * @note
614
- * Memory from the allocator will be "freed" immediately and can be
615
- * reallocated to other pointers. However, it won't be accessed by any
616
- * other task before this asynchronous task ends, because all tasks in the
617
- * same stream are executed in queue order.
618
- */
619
- #define GGML_CANN_CALL_ACLNN_OP (OP_NAME, ...) \
620
- do { \
621
- uint64_t workspaceSize = 0 ; \
622
- aclOpExecutor * executor; \
623
- void * workspaceAddr = nullptr ; \
624
- \
625
- ACL_CHECK (aclnn##OP_NAME##GetWorkspaceSize (__VA_ARGS__, &workspaceSize, &executor)); \
626
- \
627
- if (workspaceSize > 0 ) { \
628
- ggml_cann_pool_alloc workspace_allocator (ctx.pool (), workspaceSize); \
629
- workspaceAddr = workspace_allocator.get (); \
630
- } \
631
- ACL_CHECK (aclnn##OP_NAME (workspaceAddr, workspaceSize, executor, ctx.stream ())); \
632
- } while (0 )
633
-
634
-
635
- /* *
636
- * @brief Prepares broadcast-compatible ACL tensors for two input tensors and one output tensor.
595
+ * @brief Prepares broadcast-compatible ACL tensors for two input tensors and one
596
+ * output tensor.
637
597
*
638
598
* This function checks whether broadcasting is needed between `src0` and `src1`.
639
599
* If broadcasting is required, it calculates the proper shapes and creates
@@ -647,14 +607,57 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
647
607
* @param acl_src1 Output pointer to the created ACL tensor corresponding to src1.
648
608
* @param acl_dst Output pointer to the created ACL tensor corresponding to dst.
649
609
*/
650
- void bcast_shape (ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst, aclTensor ** acl_src0,
651
- aclTensor ** acl_src1, aclTensor ** acl_dst);
610
+ void bcast_shape (ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst,
611
+ aclTensor ** acl_src0, aclTensor ** acl_src1, aclTensor ** acl_dst);
612
+
613
+ /* *
614
+ * @brief Computes the 1D transposed convolution (deconvolution) of a ggml
615
+ * tensor using the CANN backend.
616
+ *
617
+ * @details This function performs a 1D transposed convolution (also known as
618
+ * deconvolution) operation on the input tensor. The computed result is stored
619
+ * in the destination tensor `dst`. The operation is optimized using the CANN
620
+ * backend for improved performance.
621
+ *
622
+ * @param ctx The CANN context used for operations.
623
+ * @param dst The destination tensor where the transposed convolution result
624
+ * will be stored. dst->op is `GGML_OP_CONV_TRANSPOSE_1D`.
625
+ */
626
+ void ggml_cann_conv_transpose_1d (ggml_backend_cann_context& ctx, ggml_tensor* dst);
652
627
653
628
/* *
654
- * @brief Applies a element-wise operation to two input tensors using the CANN backend.
629
+ * @brief Applies the ELU (Exponential Linear Unit) activation to a ggml tensor
630
+ * using the CANN backend.
631
+ *
632
+ * @details This function performs an element-wise ELU activation on the input
633
+ * tensor.
634
+ * The result is written to the destination tensor `dst` in-place.
635
+ * The ELU function is defined as:
636
+ *
637
+ * \text{ELU}(x) =
638
+ * \begin{cases}
639
+ * x, & \text{if } x > 0 \\
640
+ * \alpha \left( \exp(x) - 1 \right), & \text{if } x \leq 0
641
+ * \end{cases}
655
642
*
656
- * This templated function takes a binary operator and applies it to two source tensors
657
- * associated with the destination tensor. The function handles broadcasting as needed.
643
+ * where α (alpha) is a hyperparameter, typically set to 1.0.
644
+ * This operation is optimized using the CANN backend for high-performance
645
+ * inference or training.
646
+ *
647
+ * @param ctx The CANN context used for operations.
648
+ * @param dst The destination tensor where the ELU-activated result will be stored.
649
+ * dst->op is expected to be `GGML_OP_ELU`.
650
+ */
651
+ void ggml_cann_elu (ggml_backend_cann_context& ctx, ggml_tensor* dst);
652
+
653
+ /* *
654
+ * @brief Applies a element-wise operation to two input tensors using the CANN
655
+ * backend.
656
+ *
657
+ * This templated function takes a binary operator and applies it to two source
658
+ * tensors
659
+ * associated with the destination tensor. The function handles broadcasting as
660
+ * needed.
658
661
*
659
662
* @tparam binary_op A callable object (e.g., lambda or function pointer) representing
660
663
* the binary operation to be performed. It must take three arguments:
@@ -681,6 +684,38 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
681
684
ACL_CHECK (aclDestroyTensor (acl_dst));
682
685
}
683
686
687
+ /* *
688
+ * @brief Launches an asynchronous task using the memory allocator.
689
+ *
690
+ * This macro submit an asynchronous task on the specified stream.
691
+ * The task uses memory allocated by the allocator. It is guaranteed
692
+ * that the memory will not be accessed by other tasks until this task
693
+ * completes, due to the sequential execution order within the same stream.
694
+ *
695
+ * @param OP_NAME aclnn operator name.
696
+ * @param args Additional arguments required by the task.
697
+ *
698
+ * @note
699
+ * Memory from the allocator will be "freed" immediately and can be
700
+ * reallocated to other pointers. However, it won't be accessed by any
701
+ * other task before this asynchronous task ends, because all tasks in the
702
+ * same stream are executed in queue order.
703
+ */
704
+ #define GGML_CANN_CALL_ACLNN_OP (OP_NAME, ...) \
705
+ do { \
706
+ uint64_t workspaceSize = 0 ; \
707
+ aclOpExecutor * executor; \
708
+ void * workspaceAddr = nullptr ; \
709
+ \
710
+ ACL_CHECK (aclnn##OP_NAME##GetWorkspaceSize (__VA_ARGS__, &workspaceSize, &executor)); \
711
+ \
712
+ if (workspaceSize > 0 ) { \
713
+ ggml_cann_pool_alloc workspace_allocator (ctx.pool (), workspaceSize); \
714
+ workspaceAddr = workspace_allocator.get (); \
715
+ } \
716
+ ACL_CHECK (aclnn##OP_NAME (workspaceAddr, workspaceSize, executor, ctx.stream ())); \
717
+ } while (0 )
718
+
684
719
/* *
685
720
* @brief Applies a unary operation to an input tensor using the CANN backend.
686
721
*
@@ -690,7 +725,6 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
690
725
* @tparam unary_op A callable with the signature:
691
726
* void(ggml_backend_cann_context&, aclTensor*, aclTensor*)
692
727
* where the first aclTensor is the source and the second is the destination.
693
- *
694
728
* @param ctx The CANN backend context for managing resources and execution.
695
729
* @param dst The destination tensor. Its src[0] is treated as the input tensor.
696
730
*/
@@ -702,10 +736,30 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
702
736
aclTensor* acl_dst = ggml_cann_create_tensor (dst);
703
737
704
738
unary_op (ctx, acl_src, acl_dst);
739
+
705
740
ACL_CHECK (aclDestroyTensor (acl_src));
706
741
ACL_CHECK (aclDestroyTensor (acl_dst));
707
742
}
708
743
744
+ /* *
745
+ * @brief Applies a unary operation to a ggml tensor using the CANN backend.
746
+ *
747
+ * @details This function performs a unary operation on the input tensor using
748
+ * a user-provided lambda or callable object `unary_op`, which accepts the CANN
749
+ * context and two ACL tensors (source and destination). Internally, this function
750
+ * creates ACL representations of the ggml tensors and invokes the unary operation.
751
+ * The result is stored in the destination tensor `dst`. This utility abstracts the
752
+ * common boilerplate of tensor conversion and cleanup when implementing unary ops.
753
+ *
754
+ * @param unary_op A callable that performs the unary operation using CANN APIs.
755
+ * @param ctx The CANN context used for operations.
756
+ * @param dst The destination tensor where the result will be stored.
757
+ * The source tensor is retrieved from `dst->src[0]`.
758
+ */
759
+ void ggml_cann_unary_op (
760
+ std::function<void (ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op,
761
+ ggml_backend_cann_context& ctx, ggml_tensor* dst);
762
+
709
763
/* *
710
764
* @brief Helper macro to invoke a unary ACL operation using ggml_cann_unary_op.
711
765
*
@@ -725,11 +779,12 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
725
779
*/
726
780
#define GGML_CANN_CALL_UNARY_OP (OP_NAME ) \
727
781
do { \
728
- auto lambda = [](auto ctx, auto acl_src, auto acl_dst) { \
782
+ auto lambda = [](ggml_backend_cann_context& ctx, \
783
+ aclTensor* acl_src, \
784
+ aclTensor* acl_dst) { \
729
785
GGML_CANN_CALL_ACLNN_OP (OP_NAME, acl_src, acl_dst); \
730
786
}; \
731
- ggml_cann_unary_op<lambda>( ctx, dst); \
787
+ ggml_cann_unary_op (lambda, ctx, dst); \
732
788
} \
733
789
while (0 )
734
-
735
790
#endif // CANN_ACLNN_OPS
0 commit comments