@@ -18,7 +18,6 @@ namespace hnswlib {
18
18
public:
19
19
static const tableint max_update_element_locks = 65536 ;
20
20
HierarchicalNSW (SpaceInterface<dist_t > *s) {
21
-
22
21
}
23
22
24
23
HierarchicalNSW (SpaceInterface<dist_t > *s, const std::string &location, bool nmslib = false , size_t max_elements=0 ) {
@@ -29,7 +28,7 @@ namespace hnswlib {
29
28
link_list_locks_ (max_elements), link_list_update_locks_(max_update_element_locks), element_levels_(max_elements) {
30
29
max_elements_ = max_elements;
31
30
32
- has_deletions_= false ;
31
+ num_deleted_ = 0 ;
33
32
data_size_ = s->get_data_size ();
34
33
fstdistfunc_ = s->get_dist_func ();
35
34
dist_func_param_ = s->get_dist_func_param ();
@@ -56,8 +55,6 @@ namespace hnswlib {
56
55
57
56
visited_list_pool_ = new VisitedListPool (1 , max_elements);
58
57
59
-
60
-
61
58
// initializations for special treatment of the first node
62
59
enterpoint_node_ = -1 ;
63
60
maxlevel_ = -1 ;
@@ -92,6 +89,7 @@ namespace hnswlib {
92
89
size_t cur_element_count;
93
90
size_t size_data_per_element_;
94
91
size_t size_links_per_element_;
92
+ size_t num_deleted_;
95
93
96
94
size_t M_;
97
95
size_t maxM_;
@@ -112,20 +110,15 @@ namespace hnswlib {
112
110
std::vector<std::mutex> link_list_update_locks_;
113
111
tableint enterpoint_node_;
114
112
115
-
116
113
size_t size_links_level0_;
117
114
size_t offsetData_, offsetLevel0_;
118
115
119
-
120
116
char *data_level0_memory_;
121
117
char **linkLists_;
122
118
std::vector<int > element_levels_;
123
119
124
120
size_t data_size_;
125
121
126
- bool has_deletions_;
127
-
128
-
129
122
size_t label_offset_;
130
123
DISTFUNC<dist_t > fstdistfunc_;
131
124
void *dist_func_param_;
@@ -547,7 +540,7 @@ namespace hnswlib {
547
540
}
548
541
}
549
542
550
- if (has_deletions_ ) {
543
+ if (num_deleted_ ) {
551
544
std::priority_queue<std::pair<dist_t , tableint >> top_candidates1=searchBaseLayerST<true >(currObj, query_data,
552
545
ef_);
553
546
top_candidates.swap (top_candidates1);
@@ -623,8 +616,6 @@ namespace hnswlib {
623
616
}
624
617
625
618
void loadIndex (const std::string &location, SpaceInterface<dist_t > *s, size_t max_elements_i=0 ) {
626
-
627
-
628
619
std::ifstream input (location, std::ios::binary);
629
620
630
621
if (!input.is_open ())
@@ -639,7 +630,7 @@ namespace hnswlib {
639
630
readBinaryPOD (input, max_elements_);
640
631
readBinaryPOD (input, cur_element_count);
641
632
642
- size_t max_elements= max_elements_i;
633
+ size_t max_elements = max_elements_i;
643
634
if (max_elements < cur_element_count)
644
635
max_elements = max_elements_;
645
636
max_elements_ = max_elements;
@@ -688,26 +679,19 @@ namespace hnswlib {
688
679
689
680
input.seekg (pos,input.beg );
690
681
691
-
692
682
data_level0_memory_ = (char *) malloc (max_elements * size_data_per_element_);
693
683
if (data_level0_memory_ == nullptr )
694
684
throw std::runtime_error (" Not enough memory: loadIndex failed to allocate level0" );
695
685
input.read (data_level0_memory_, cur_element_count * size_data_per_element_);
696
686
697
-
698
-
699
-
700
687
size_links_per_element_ = maxM_ * sizeof (tableint) + sizeof (linklistsizeint);
701
688
702
-
703
689
size_links_level0_ = maxM0_ * sizeof (tableint) + sizeof (linklistsizeint);
704
690
std::vector<std::mutex>(max_elements).swap (link_list_locks_);
705
691
std::vector<std::mutex>(max_update_element_locks).swap (link_list_update_locks_);
706
692
707
-
708
693
visited_list_pool_ = new VisitedListPool (1 , max_elements);
709
694
710
-
711
695
linkLists_ = (char **) malloc (sizeof (void *) * max_elements);
712
696
if (linkLists_ == nullptr )
713
697
throw std::runtime_error (" Not enough memory: loadIndex failed to allocate linklists" );
@@ -731,11 +715,9 @@ namespace hnswlib {
731
715
}
732
716
}
733
717
734
- has_deletions_=false ;
735
-
736
718
for (size_t i = 0 ; i < cur_element_count; i++) {
737
719
if (isMarkedDeleted (i))
738
- has_deletions_= true ;
720
+ num_deleted_ += 1 ;
739
721
}
740
722
741
723
input.close ();
@@ -765,19 +747,19 @@ namespace hnswlib {
765
747
}
766
748
767
749
static const unsigned char DELETE_MARK = 0x01 ;
768
- // static const unsigned char REUSE_MARK = 0x10;
750
+ // static const unsigned char REUSE_MARK = 0x10;
769
751
/* *
770
752
* Marks an element with the given label deleted, does NOT really change the current graph.
771
753
* @param label
772
754
*/
773
755
void markDelete (labeltype label)
774
756
{
775
- has_deletions_=true ;
776
757
auto search = label_lookup_.find (label);
777
758
if (search == label_lookup_.end ()) {
778
759
throw std::runtime_error (" Label not found" );
779
760
}
780
- markDeletedInternal (search->second );
761
+ tableint internalId = search->second ;
762
+ markDeletedInternal (internalId);
781
763
}
782
764
783
765
/* *
@@ -786,17 +768,49 @@ namespace hnswlib {
786
768
* @param internalId
787
769
*/
788
770
void markDeletedInternal (tableint internalId) {
789
- unsigned char *ll_cur = ((unsigned char *)get_linklist0 (internalId))+2 ;
790
- *ll_cur |= DELETE_MARK;
771
+ assert (internalId < cur_element_count);
772
+ if (!isMarkedDeleted (internalId))
773
+ {
774
+ unsigned char *ll_cur = ((unsigned char *)get_linklist0 (internalId))+2 ;
775
+ *ll_cur |= DELETE_MARK;
776
+ num_deleted_ += 1 ;
777
+ }
778
+ else
779
+ {
780
+ throw std::runtime_error (" The requested to delete element is already deleted" );
781
+ }
782
+ }
783
+
784
+ /* *
785
+ * Remove the deleted mark of the node, does NOT really change the current graph.
786
+ * @param label
787
+ */
788
+ void unmarkDelete (labeltype label)
789
+ {
790
+ auto search = label_lookup_.find (label);
791
+ if (search == label_lookup_.end ()) {
792
+ throw std::runtime_error (" Label not found" );
793
+ }
794
+ tableint internalId = search->second ;
795
+ unmarkDeletedInternal (internalId);
791
796
}
792
797
793
798
/* *
794
799
* Remove the deleted mark of the node.
795
800
* @param internalId
796
801
*/
797
802
void unmarkDeletedInternal (tableint internalId) {
798
- unsigned char *ll_cur = ((unsigned char *)get_linklist0 (internalId))+2 ;
799
- *ll_cur &= ~DELETE_MARK;
803
+ assert (internalId < cur_element_count);
804
+ if (isMarkedDeleted (internalId))
805
+ {
806
+ unsigned char *ll_cur = ((unsigned char *)get_linklist0 (internalId))+2 ;
807
+ *ll_cur &= ~DELETE_MARK;
808
+ num_deleted_ -= 1 ;
809
+ }
810
+ else
811
+ {
812
+ throw std::runtime_error (" The requested to undelete element is not deleted" );
813
+ }
800
814
}
801
815
802
816
/* *
@@ -857,8 +871,8 @@ namespace hnswlib {
857
871
}
858
872
859
873
for (auto && neigh : sNeigh ) {
860
- // if (neigh == internalId)
861
- // continue;
874
+ // if (neigh == internalId)
875
+ // continue;
862
876
863
877
std::priority_queue<std::pair<dist_t , tableint>, std::vector<std::pair<dist_t , tableint>>, CompareByFirst> candidates;
864
878
size_t size = sCand .find (neigh) == sCand .end () ? sCand .size () : sCand .size () - 1 ; // sCand guaranteed to have size >= 1
@@ -1133,7 +1147,7 @@ namespace hnswlib {
1133
1147
}
1134
1148
1135
1149
std::priority_queue<std::pair<dist_t , tableint>, std::vector<std::pair<dist_t , tableint>>, CompareByFirst> top_candidates;
1136
- if (has_deletions_ ) {
1150
+ if (num_deleted_ ) {
1137
1151
top_candidates=searchBaseLayerST<true ,true >(
1138
1152
currObj, query_data, std::max (ef_, k));
1139
1153
}
0 commit comments