Skip to content

Commit f82b70e

Browse files
danbevarthw
authored andcommitted
llama : std::move llm_bigram_bpe from work_queue (ggml-org#9062)
* llama : std::move llm_bigram_bpe from work_queue This commit updates the retrieval of llm_bigram_bpe objects from work_queue.top() by using std::move. The motivation for this is to avoid the copying of the std::string `text` member of the llm_bigram_bpe struct. * squash! llama : std::move llm_bigram_bpe from work_queue Introduced a MovablePriorityQueue class to allow moving elements out of the priority queue for llm_bigram_bpe. * squash! llama : std::move llm_bigram_bpe from work_queue Rename MovablePriorityQueue to lama_priority_queue. * squash! llama : std::move llm_bigram_bpe from work_queue Rename lama_priority_queue -> llama_priority_queue.
1 parent c4943e9 commit f82b70e

File tree

1 file changed

+17
-3
lines changed

1 file changed

+17
-3
lines changed

src/llama-vocab.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,21 @@ struct llm_tokenizer_spm {
321321

322322
// TODO: there are a lot of common parts between spm and bpe tokenizers, should be refactored and reused
323323

324+
template<typename T, typename Container = std::vector<T>, typename Compare = std::less<typename Container::value_type>>
325+
class llama_priority_queue : public std::priority_queue<T, Container, Compare> {
326+
public:
327+
using std::priority_queue<T, Container, Compare>::priority_queue;
328+
329+
T pop_move() {
330+
T item = std::move(this->c.front());
331+
std::pop_heap(this->c.begin(), this->c.end(), this->comp);
332+
this->c.pop_back();
333+
return item;
334+
}
335+
336+
void pop() = delete;
337+
};
338+
324339
struct llm_bigram_bpe {
325340
struct comparator {
326341
bool operator()(const llm_bigram_bpe & l, const llm_bigram_bpe & r) const {
@@ -329,7 +344,7 @@ struct llm_bigram_bpe {
329344
};
330345

331346
using queue_storage = std::vector<llm_bigram_bpe>;
332-
using queue = std::priority_queue<llm_bigram_bpe, queue_storage, comparator>;
347+
using queue = llama_priority_queue<llm_bigram_bpe, queue_storage, comparator>;
333348
llm_symbol::index left;
334349
llm_symbol::index right;
335350
std::string text;
@@ -520,8 +535,7 @@ struct llm_tokenizer_bpe {
520535

521536
// build token(s)
522537
while (!work_queue.empty()) {
523-
auto bigram = work_queue.top();
524-
work_queue.pop();
538+
auto bigram = work_queue.pop_move();
525539

526540
auto & left_symbol = symbols[bigram.left];
527541
auto & right_symbol = symbols[bigram.right];

0 commit comments

Comments
 (0)