Skip to content

Commit 9006b32

Browse files
author
Dmitry Yashunin
committed
Unmark deleted
1 parent 8f1a044 commit 9006b32

File tree

4 files changed

+88
-69
lines changed

4 files changed

+88
-69
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,6 @@ python_bindings/tests/__pycache__/
66
*.pyd
77
hnswlib.cpython*.so
88
var/
9+
.idea/
10+
.vscode/
11+

hnswlib/hnswalg.h

Lines changed: 47 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ namespace hnswlib {
1818
public:
1919
static const tableint max_update_element_locks = 65536;
2020
HierarchicalNSW(SpaceInterface<dist_t> *s) {
21-
2221
}
2322

2423
HierarchicalNSW(SpaceInterface<dist_t> *s, const std::string &location, bool nmslib = false, size_t max_elements=0) {
@@ -29,7 +28,7 @@ namespace hnswlib {
2928
link_list_locks_(max_elements), link_list_update_locks_(max_update_element_locks), element_levels_(max_elements) {
3029
max_elements_ = max_elements;
3130

32-
has_deletions_=false;
31+
num_deleted_ = 0;
3332
data_size_ = s->get_data_size();
3433
fstdistfunc_ = s->get_dist_func();
3534
dist_func_param_ = s->get_dist_func_param();
@@ -56,8 +55,6 @@ namespace hnswlib {
5655

5756
visited_list_pool_ = new VisitedListPool(1, max_elements);
5857

59-
60-
6158
//initializations for special treatment of the first node
6259
enterpoint_node_ = -1;
6360
maxlevel_ = -1;
@@ -92,6 +89,7 @@ namespace hnswlib {
9289
size_t cur_element_count;
9390
size_t size_data_per_element_;
9491
size_t size_links_per_element_;
92+
size_t num_deleted_;
9593

9694
size_t M_;
9795
size_t maxM_;
@@ -112,20 +110,15 @@ namespace hnswlib {
112110
std::vector<std::mutex> link_list_update_locks_;
113111
tableint enterpoint_node_;
114112

115-
116113
size_t size_links_level0_;
117114
size_t offsetData_, offsetLevel0_;
118115

119-
120116
char *data_level0_memory_;
121117
char **linkLists_;
122118
std::vector<int> element_levels_;
123119

124120
size_t data_size_;
125121

126-
bool has_deletions_;
127-
128-
129122
size_t label_offset_;
130123
DISTFUNC<dist_t> fstdistfunc_;
131124
void *dist_func_param_;
@@ -547,7 +540,7 @@ namespace hnswlib {
547540
}
548541
}
549542

550-
if (has_deletions_) {
543+
if (num_deleted_) {
551544
std::priority_queue<std::pair<dist_t, tableint >> top_candidates1=searchBaseLayerST<true>(currObj, query_data,
552545
ef_);
553546
top_candidates.swap(top_candidates1);
@@ -623,8 +616,6 @@ namespace hnswlib {
623616
}
624617

625618
void loadIndex(const std::string &location, SpaceInterface<dist_t> *s, size_t max_elements_i=0) {
626-
627-
628619
std::ifstream input(location, std::ios::binary);
629620

630621
if (!input.is_open())
@@ -639,7 +630,7 @@ namespace hnswlib {
639630
readBinaryPOD(input, max_elements_);
640631
readBinaryPOD(input, cur_element_count);
641632

642-
size_t max_elements=max_elements_i;
633+
size_t max_elements = max_elements_i;
643634
if(max_elements < cur_element_count)
644635
max_elements = max_elements_;
645636
max_elements_ = max_elements;
@@ -688,26 +679,19 @@ namespace hnswlib {
688679

689680
input.seekg(pos,input.beg);
690681

691-
692682
data_level0_memory_ = (char *) malloc(max_elements * size_data_per_element_);
693683
if (data_level0_memory_ == nullptr)
694684
throw std::runtime_error("Not enough memory: loadIndex failed to allocate level0");
695685
input.read(data_level0_memory_, cur_element_count * size_data_per_element_);
696686

697-
698-
699-
700687
size_links_per_element_ = maxM_ * sizeof(tableint) + sizeof(linklistsizeint);
701688

702-
703689
size_links_level0_ = maxM0_ * sizeof(tableint) + sizeof(linklistsizeint);
704690
std::vector<std::mutex>(max_elements).swap(link_list_locks_);
705691
std::vector<std::mutex>(max_update_element_locks).swap(link_list_update_locks_);
706692

707-
708693
visited_list_pool_ = new VisitedListPool(1, max_elements);
709694

710-
711695
linkLists_ = (char **) malloc(sizeof(void *) * max_elements);
712696
if (linkLists_ == nullptr)
713697
throw std::runtime_error("Not enough memory: loadIndex failed to allocate linklists");
@@ -731,11 +715,9 @@ namespace hnswlib {
731715
}
732716
}
733717

734-
has_deletions_=false;
735-
736718
for (size_t i = 0; i < cur_element_count; i++) {
737719
if(isMarkedDeleted(i))
738-
has_deletions_=true;
720+
num_deleted_ += 1;
739721
}
740722

741723
input.close();
@@ -765,19 +747,19 @@ namespace hnswlib {
765747
}
766748

767749
static const unsigned char DELETE_MARK = 0x01;
768-
// static const unsigned char REUSE_MARK = 0x10;
750+
// static const unsigned char REUSE_MARK = 0x10;
769751
/**
770752
* Marks an element with the given label deleted, does NOT really change the current graph.
771753
* @param label
772754
*/
773755
void markDelete(labeltype label)
774756
{
775-
has_deletions_=true;
776757
auto search = label_lookup_.find(label);
777758
if (search == label_lookup_.end()) {
778759
throw std::runtime_error("Label not found");
779760
}
780-
markDeletedInternal(search->second);
761+
tableint internalId = search->second;
762+
markDeletedInternal(internalId);
781763
}
782764

783765
/**
@@ -786,17 +768,49 @@ namespace hnswlib {
786768
* @param internalId
787769
*/
788770
void markDeletedInternal(tableint internalId) {
789-
unsigned char *ll_cur = ((unsigned char *)get_linklist0(internalId))+2;
790-
*ll_cur |= DELETE_MARK;
771+
assert(internalId < cur_element_count);
772+
if (!isMarkedDeleted(internalId))
773+
{
774+
unsigned char *ll_cur = ((unsigned char *)get_linklist0(internalId))+2;
775+
*ll_cur |= DELETE_MARK;
776+
num_deleted_ += 1;
777+
}
778+
else
779+
{
780+
throw std::runtime_error("The requested to delete element is already deleted");
781+
}
782+
}
783+
784+
/**
785+
* Remove the deleted mark of the node, does NOT really change the current graph.
786+
* @param label
787+
*/
788+
void unmarkDelete(labeltype label)
789+
{
790+
auto search = label_lookup_.find(label);
791+
if (search == label_lookup_.end()) {
792+
throw std::runtime_error("Label not found");
793+
}
794+
tableint internalId = search->second;
795+
unmarkDeletedInternal(internalId);
791796
}
792797

793798
/**
794799
* Remove the deleted mark of the node.
795800
* @param internalId
796801
*/
797802
void unmarkDeletedInternal(tableint internalId) {
798-
unsigned char *ll_cur = ((unsigned char *)get_linklist0(internalId))+2;
799-
*ll_cur &= ~DELETE_MARK;
803+
assert(internalId < cur_element_count);
804+
if (isMarkedDeleted(internalId))
805+
{
806+
unsigned char *ll_cur = ((unsigned char *)get_linklist0(internalId))+2;
807+
*ll_cur &= ~DELETE_MARK;
808+
num_deleted_ -= 1;
809+
}
810+
else
811+
{
812+
throw std::runtime_error("The requested to undelete element is not deleted");
813+
}
800814
}
801815

802816
/**
@@ -857,8 +871,8 @@ namespace hnswlib {
857871
}
858872

859873
for (auto&& neigh : sNeigh) {
860-
// if (neigh == internalId)
861-
// continue;
874+
// if (neigh == internalId)
875+
// continue;
862876

863877
std::priority_queue<std::pair<dist_t, tableint>, std::vector<std::pair<dist_t, tableint>>, CompareByFirst> candidates;
864878
size_t size = sCand.find(neigh) == sCand.end() ? sCand.size() : sCand.size() - 1; // sCand guaranteed to have size >= 1
@@ -1133,7 +1147,7 @@ namespace hnswlib {
11331147
}
11341148

11351149
std::priority_queue<std::pair<dist_t, tableint>, std::vector<std::pair<dist_t, tableint>>, CompareByFirst> top_candidates;
1136-
if (has_deletions_) {
1150+
if (num_deleted_) {
11371151
top_candidates=searchBaseLayerST<true,true>(
11381152
currObj, query_data, std::max(ef_, k));
11391153
}

0 commit comments

Comments
 (0)