Skip to content

Commit 6d36f48

Browse files
committed
Handle the review comments of this pull request
1 parent 4d4ae1c commit 6d36f48

File tree

4 files changed

+10
-19
lines changed

4 files changed

+10
-19
lines changed

ggml/include/ggml-cann.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ extern "C" {
3333
* @brief Maximum number of CANN devices supported.
3434
*/
3535
#define GGML_CANN_MAX_DEVICES 16
36-
#define GGML_CANN_NAME "CANN"
3736

3837
GGML_API ggml_backend_reg_t ggml_backend_cann_reg(void);
3938

ggml/src/ggml-backend.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -588,14 +588,13 @@ struct ggml_backend_registry {
588588
#ifdef GGML_USE_RPC
589589
register_backend(ggml_backend_rpc_reg());
590590
#endif
591-
592591
#ifdef GGML_USE_AMX
593592
register_backend(ggml_backend_amx_reg());
594593
#endif
595-
596594
#ifdef GGML_USE_CANN
597595
register_backend(ggml_backend_cann_reg());
598596
#endif
597+
599598
// TODO: kompute
600599

601600
register_backend(ggml_backend_cpu_reg());

ggml/src/ggml-cann.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@
3939

4040
#include "ggml-common.h"
4141

42+
#define GGML_CANN_NAME "CANN"
43+
4244
/**
4345
* @brief Handles CANN errors by printing an error message and aborting.
4446
*

src/llama.cpp

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010

1111
#if defined(GGML_USE_KOMPUTE)
1212
# include "ggml-kompute.h"
13-
#elif defined(GGML_USE_CANN)
14-
# include "ggml-cann.h"
1513
#endif
1614

1715
#ifndef __AMX_INT8__
@@ -3416,11 +3414,7 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_cpu(const llama_mode
34163414
}
34173415
}
34183416

3419-
#if defined(GGML_USE_CANN)
3420-
if (host_buffer) {
3421-
buft = ggml_backend_cann_host_buffer_type();
3422-
}
3423-
#elif defined(GGML_USE_CPU_HBM)
3417+
#if defined(GGML_USE_CPU_HBM)
34243418
buft = ggml_backend_cpu_hbm_buffer_type();
34253419
#endif
34263420

@@ -3442,8 +3436,6 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_offload(const llama_
34423436

34433437
#if defined(GGML_USE_KOMPUTE)
34443438
buft = ggml_backend_kompute_buffer_type(device);
3445-
#elif defined(GGML_USE_CANN)
3446-
buft = ggml_backend_cann_buffer_type(device);
34473439
#endif
34483440

34493441
if (buft == nullptr) {
@@ -3487,14 +3479,13 @@ static size_t llama_get_device_memory(const llama_model & model, int device) {
34873479
return free;
34883480
}
34893481

3490-
#if defined(GGML_USE_CANN)
3491-
size_t total;
3492-
size_t free;
3493-
ggml_backend_cann_get_device_memory(device, &free, &total);
3494-
return free;
3495-
#else
3482+
if (model.devices.size() > 0) {
3483+
ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(model.devices[0]);
3484+
LLAMA_LOG_WARN("%s: failed to get free memmory of device:%d of backend:%s, for device id is out of range.\n", __func__, device, ggml_backend_reg_name(reg));
3485+
} else {
3486+
LLAMA_LOG_WARN("%s: failed to get free memmory of device.\n", __func__);
3487+
}
34963488
return 1;
3497-
#endif
34983489

34993490
GGML_UNUSED(model);
35003491
GGML_UNUSED(device);

0 commit comments

Comments
 (0)