|
12 | 12 | #include <cstdlib>
|
13 | 13 | #include <climits>
|
14 | 14 |
|
15 |
| -#include <memory> |
16 | 15 | #include <string>
|
17 | 16 | #include <vector>
|
18 | 17 | #include <stdexcept>
|
@@ -414,80 +413,90 @@ struct llama_mlock {
|
414 | 413 |
|
415 | 414 | // Replacement for std::vector<uint8_t> that doesn't require zero-initialization.
|
416 | 415 | struct llama_buffer {
|
417 |
| -#ifdef GGML_USE_METAL |
418 |
| - struct metal_deleter { void operator()(uint8_t* addr) const { free(addr); } }; |
419 |
| - std::unique_ptr<uint8_t, metal_deleter> addr; |
420 |
| -#else |
421 |
| - std::unique_ptr<uint8_t[]> addr; |
422 |
| -#endif |
| 416 | + uint8_t * addr = NULL; |
423 | 417 | size_t size = 0;
|
424 | 418 |
|
425 | 419 | llama_buffer() = default;
|
426 |
| - llama_buffer(const llama_buffer& rhs) { *this = rhs; } |
427 |
| - llama_buffer& operator=(const llama_buffer& rhs) { |
428 |
| - resize(rhs.size); |
429 |
| - memcpy(addr.get(), rhs.addr.get(), size); |
430 |
| - return *this; |
431 |
| - } |
432 | 420 |
|
433 | 421 | void resize(size_t len) {
|
434 |
| - addr.reset(); |
435 | 422 | #ifdef GGML_USE_METAL
|
436 |
| - size = 0; |
437 |
| - uint8_t* ptr; |
438 |
| - int result = posix_memalign((void **) &ptr, getpagesize(), len); |
| 423 | + free(addr); |
| 424 | + int result = posix_memalign((void **) &addr, getpagesize(), len); |
439 | 425 | if (result == 0) {
|
440 |
| - memset(ptr, 0, len); |
441 |
| - addr.reset(ptr); |
442 |
| - size = len; |
| 426 | + memset(addr, 0, len); |
| 427 | + } |
| 428 | + else { |
| 429 | + addr = NULL; |
| 430 | + len = 0; |
443 | 431 | }
|
444 | 432 | #else
|
445 |
| - addr.reset(new uint8_t[len]); |
| 433 | + delete[] addr; |
| 434 | + addr = new uint8_t[len]; |
| 435 | +#endif |
446 | 436 | size = len;
|
| 437 | + } |
| 438 | + |
| 439 | + ~llama_buffer() { |
| 440 | +#ifdef GGML_USE_METAL |
| 441 | + free(addr); |
| 442 | +#else |
| 443 | + delete[] addr; |
447 | 444 | #endif
|
| 445 | + addr = NULL; |
448 | 446 | }
|
| 447 | + |
| 448 | + // disable copy and move |
| 449 | + llama_buffer(const llama_buffer&) = delete; |
| 450 | + llama_buffer(llama_buffer&&) = delete; |
| 451 | + llama_buffer& operator=(const llama_buffer&) = delete; |
| 452 | + llama_buffer& operator=(llama_buffer&&) = delete; |
449 | 453 | };
|
450 | 454 |
|
451 | 455 | #ifdef GGML_USE_CUBLAS
|
452 | 456 | #include "ggml-cuda.h"
|
453 | 457 | struct llama_ctx_buffer {
|
454 |
| - struct cuda_deleter { |
455 |
| - bool is_cuda; |
456 |
| - void operator()(uint8_t* addr) const { |
457 |
| - if (addr) { |
458 |
| - if (is_cuda) { |
459 |
| - ggml_cuda_host_free(addr); |
460 |
| - } else { |
461 |
| - delete[] addr; |
462 |
| - } |
463 |
| - } |
464 |
| - } |
465 |
| - }; |
466 |
| - using Addr = std::unique_ptr<uint8_t, cuda_deleter>; |
467 |
| - Addr addr; |
| 458 | + uint8_t * addr = NULL; |
| 459 | + bool is_cuda; |
468 | 460 | size_t size = 0;
|
469 | 461 |
|
470 | 462 | llama_ctx_buffer() = default;
|
471 |
| - llama_ctx_buffer(const llama_ctx_buffer& rhs) { *this = rhs; } |
472 |
| - llama_ctx_buffer& operator=(const llama_ctx_buffer& rhs) { |
473 |
| - resize(rhs.size); |
474 |
| - memcpy(addr.get(), rhs.addr.get(), size); |
475 |
| - return *this; |
476 |
| - } |
477 | 463 |
|
478 |
| - void resize(size_t len) { |
479 |
| - addr.reset(); |
| 464 | + void resize(size_t size) { |
| 465 | + free(); |
480 | 466 |
|
481 |
| - bool is_cuda = true; |
482 |
| - auto* ptr = (uint8_t*) ggml_cuda_host_malloc(len); |
483 |
| - if (!ptr) { |
| 467 | + addr = (uint8_t *) ggml_cuda_host_malloc(size); |
| 468 | + if (addr) { |
| 469 | + is_cuda = true; |
| 470 | + } |
| 471 | + else { |
484 | 472 | // fall back to pageable memory
|
485 |
| - ptr = new uint8_t[len]; |
| 473 | + addr = new uint8_t[size]; |
486 | 474 | is_cuda = false;
|
487 | 475 | }
|
488 |
| - addr = Addr(ptr, {is_cuda}); |
489 |
| - size = len; |
| 476 | + this->size = size; |
490 | 477 | }
|
| 478 | + |
| 479 | + void free() { |
| 480 | + if (addr) { |
| 481 | + if (is_cuda) { |
| 482 | + ggml_cuda_host_free(addr); |
| 483 | + } |
| 484 | + else { |
| 485 | + delete[] addr; |
| 486 | + } |
| 487 | + } |
| 488 | + addr = NULL; |
| 489 | + } |
| 490 | + |
| 491 | + ~llama_ctx_buffer() { |
| 492 | + free(); |
| 493 | + } |
| 494 | + |
| 495 | + // disable copy and move |
| 496 | + llama_ctx_buffer(const llama_ctx_buffer&) = delete; |
| 497 | + llama_ctx_buffer(llama_ctx_buffer&&) = delete; |
| 498 | + llama_ctx_buffer& operator=(const llama_ctx_buffer&) = delete; |
| 499 | + llama_ctx_buffer& operator=(llama_ctx_buffer&&) = delete; |
491 | 500 | };
|
492 | 501 | #else
|
493 | 502 | typedef llama_buffer llama_ctx_buffer;
|
|
0 commit comments