@@ -904,6 +904,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
904
904
return ;
905
905
}
906
906
// TODO: simplify
907
+ const size_t CANN_DUP_OP_SUPPORTED_MAX_ROWS = 65535 ;
907
908
if (src->type == GGML_TYPE_F16) {
908
909
if (dst->type == GGML_TYPE_Q8_0) {
909
910
aclrtlaunch_ascendc_quantize_f16_q8_0 (
@@ -931,7 +932,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
931
932
if (src->nb [0 ] == src_type_size) {
932
933
// src0 is contigous on first dimension, copy by rows
933
934
int64_t rows_num = ggml_nrows (src);
934
-
935
+ GGML_ASSERT (rows_num <= CANN_DUP_OP_SUPPORTED_MAX_ROWS);
935
936
aclrtlaunch_ascendc_dup_by_rows_fp16 (
936
937
rows_num, ctx.stream (), src->data , dst->data ,
937
938
((ggml_tensor*)src->extra )->ne ,
@@ -956,6 +957,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
956
957
if (src->nb [0 ] == src_type_size) {
957
958
// src0 is contigous on first dimension, copy by rows
958
959
int64_t rows_num = ggml_nrows (src);
960
+ GGML_ASSERT (rows_num <= CANN_DUP_OP_SUPPORTED_MAX_ROWS);
959
961
aclrtlaunch_ascendc_dup_by_rows_fp16_to_fp32 (
960
962
rows_num, ctx.stream (), src->data , dst->data ,
961
963
((ggml_tensor*)src->extra )->ne ,
@@ -999,6 +1001,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
999
1001
if (src->nb [0 ] == src_type_size) {
1000
1002
// src0 is contigous on first dimension, copy by rows
1001
1003
int64_t rows_num = ggml_nrows (src);
1004
+ GGML_ASSERT (rows_num <= CANN_DUP_OP_SUPPORTED_MAX_ROWS);
1002
1005
aclrtlaunch_ascendc_dup_by_rows_fp32 (
1003
1006
rows_num, ctx.stream (), src->data , dst->data ,
1004
1007
((ggml_tensor*)src->extra )->ne ,
@@ -1025,6 +1028,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1025
1028
if (src->nb [0 ] == src_type_size) {
1026
1029
// src0 is contigous on first dimension, copy by rows
1027
1030
int64_t rows_num = ggml_nrows (src);
1031
+ GGML_ASSERT (rows_num <= CANN_DUP_OP_SUPPORTED_MAX_ROWS);
1028
1032
aclrtlaunch_ascendc_dup_by_rows_fp32_to_fp16 (
1029
1033
rows_num, ctx.stream (), src->data , dst->data ,
1030
1034
((ggml_tensor*)src->extra )->ne ,
@@ -2315,8 +2319,6 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
2315
2319
{
2316
2320
if ((src0->ne [0 ] % 8 ) != 0 ) {
2317
2321
size_t dst_len = src1->ne [0 ] * src1->ne [1 ] * src1->ne [2 ] * src0->ne [0 ] * ggml_type_size (GGML_TYPE_F32);
2318
- /* printf("\n\nggml_cann_get_rows: row elements:%d, src1->ne[0]:%d, src1->ne[1]:%d, src1->ne[2]%d, src0->ne[0]:%d, ggml_type_size(GGML_TYPE_F32):%d, dst_len:%d.\n", src0->ne[0],
2319
- src1->ne[0], src1->ne[1], src1->ne[2], src0->ne[0], ggml_type_size(GGML_TYPE_F32), dst_len); */
2320
2322
ACL_CHECK (aclrtMemset ((char *)dst->data , dst_len, 0 , dst_len));
2321
2323
}
2322
2324
aclrtlaunch_ascendc_get_row_f32 (
@@ -2332,8 +2334,6 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
2332
2334
{
2333
2335
if ((src0->ne [0 ] % 16 ) != 0 ) {
2334
2336
size_t dst_len = src1->ne [0 ] * src1->ne [1 ] * src1->ne [2 ] * src0->ne [0 ] * ggml_type_size (GGML_TYPE_F32); // out is also f32, even input is f16
2335
- /* printf("\n\nggml_cann_get_rows: row elements:%d, src1->ne[0]:%d, src1->ne[1]:%d, src1->ne[2]:%d, src0->ne[0]:%d, ggml_type_size(GGML_TYPE_F32):%d, dst_len:%d.\n", src0->ne[0],
2336
- src1->ne[0], src1->ne[1], src1->ne[2], src0->ne[0], ggml_type_size(GGML_TYPE_F32), dst_len); */
2337
2337
ACL_CHECK (aclrtMemset ((char *)dst->data , dst_len, 0 , dst_len));
2338
2338
}
2339
2339
aclrtlaunch_ascendc_get_row_f16 (
0 commit comments