@@ -38,10 +38,9 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
38
38
// Locks to prevent race condition during update/insert of an element at same time.
39
39
// Note: Locks for additions can also be used to prevent this race condition
40
40
// if the querying of KNN is not exposed along with update/inserts i.e multithread insert/update/query in parallel.
41
- std::vector<std::mutex> link_list_update_locks_;
41
+ mutable std::vector<std::mutex> link_list_update_locks_;
42
42
43
43
std::mutex global;
44
- std::mutex cur_element_count_guard_;
45
44
std::vector<std::mutex> link_list_locks_;
46
45
47
46
tableint enterpoint_node_{0 };
@@ -57,7 +56,8 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
57
56
58
57
DISTFUNC<dist_t > fstdistfunc_;
59
58
void *dist_func_param_{nullptr };
60
- std::mutex label_lookup_lock;
59
+
60
+ mutable std::mutex label_lookup_lock; // lock for label_lookup_
61
61
std::unordered_map<labeltype, tableint> label_lookup_;
62
62
63
63
std::default_random_engine level_generator_;
@@ -68,7 +68,7 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
68
68
69
69
bool replace_deleted_ = false ;
70
70
71
- std::mutex deleted_elements_lock;
71
+ std::mutex deleted_elements_lock; // lock for deleted_elements
72
72
std::unordered_set<tableint> deleted_elements;
73
73
74
74
@@ -714,14 +714,16 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
714
714
715
715
template <typename data_t >
716
716
std::vector<data_t > getDataByLabel (labeltype label) const {
717
- tableint label_internal ;
717
+ std::unique_lock <std::mutex> lock_table (label_lookup_lock) ;
718
718
auto search = label_lookup_.find (label);
719
719
if (search == label_lookup_.end () || isMarkedDeleted (search->second )) {
720
720
throw std::runtime_error (" Label not found" );
721
721
}
722
- label_internal = search->second ;
723
-
724
- char * data_ptrv = getDataByInternalId (label_internal);
722
+ tableint internalId = search->second ;
723
+ lock_table.unlock ();
724
+ // wait for element addition or update
725
+ std::unique_lock <std::mutex> lock_el_update (link_list_update_locks_[(internalId & (max_update_element_locks - 1 ))]);
726
+ char * data_ptrv = getDataByInternalId (internalId);
725
727
size_t dim = *((size_t *) dist_func_param_);
726
728
std::vector<data_t > data;
727
729
data_t * data_ptr = (data_t *) data_ptrv;
@@ -737,11 +739,15 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
737
739
* Marks an element with the given label deleted, does NOT really change the current graph.
738
740
*/
739
741
void markDelete (labeltype label) {
742
+ std::unique_lock <std::mutex> lock_table (label_lookup_lock);
740
743
auto search = label_lookup_.find (label);
741
744
if (search == label_lookup_.end ()) {
742
745
throw std::runtime_error (" Label not found" );
743
746
}
744
747
tableint internalId = search->second ;
748
+ lock_table.unlock ();
749
+ // wait for element addition or update
750
+ std::unique_lock <std::mutex> lock_el_update (link_list_update_locks_[(internalId & (max_update_element_locks - 1 ))]);
745
751
markDeletedInternal (internalId);
746
752
}
747
753
@@ -756,7 +762,10 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
756
762
unsigned char *ll_cur = ((unsigned char *)get_linklist0 (internalId))+2 ;
757
763
*ll_cur |= DELETE_MARK;
758
764
num_deleted_ += 1 ;
759
- if (replace_deleted_) deleted_elements.insert (internalId);
765
+ if (replace_deleted_) {
766
+ std::unique_lock <std::mutex> lock_deleted_elements (deleted_elements_lock);
767
+ deleted_elements.insert (internalId);
768
+ }
760
769
} else {
761
770
throw std::runtime_error (" The requested to delete element is already deleted" );
762
771
}
@@ -767,25 +776,36 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
767
776
* Remove the deleted mark of the node, does NOT really change the current graph.
768
777
*/
769
778
void unmarkDelete (labeltype label) {
779
+ std::unique_lock <std::mutex> lock_table (label_lookup_lock);
770
780
auto search = label_lookup_.find (label);
771
781
if (search == label_lookup_.end ()) {
772
782
throw std::runtime_error (" Label not found" );
773
783
}
774
784
tableint internalId = search->second ;
785
+ lock_table.unlock ();
786
+ // wait for element addition or update
787
+ std::unique_lock <std::mutex> lock_el_update (link_list_update_locks_[(internalId & (max_update_element_locks - 1 ))]);
775
788
unmarkDeletedInternal (internalId);
776
789
}
777
790
778
791
792
+
779
793
/* *
780
- * Remove the deleted mark of the node.
781
- */
794
+ * Remove the deleted mark of the node.
795
+ *
796
+ * Note: the method is not safe to use when replacement of deleted elements is enabled
797
+ * bacause elements marked as deleted can be completely removed from the index
798
+ */
782
799
void unmarkDeletedInternal (tableint internalId) {
783
800
assert (internalId < cur_element_count);
784
801
if (isMarkedDeleted (internalId)) {
785
802
unsigned char *ll_cur = ((unsigned char *)get_linklist0 (internalId)) + 2 ;
786
803
*ll_cur &= ~DELETE_MARK;
787
804
num_deleted_ -= 1 ;
788
- if (replace_deleted_) deleted_elements.erase (internalId);
805
+ if (replace_deleted_) {
806
+ std::unique_lock <std::mutex> lock_deleted_elements (deleted_elements_lock);
807
+ deleted_elements.erase (internalId);
808
+ }
789
809
} else {
790
810
throw std::runtime_error (" The requested to undelete element is not deleted" );
791
811
}
@@ -813,42 +833,49 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
813
833
814
834
/* *
815
835
* Adds point and replaces previously deleted point if any, updating it with new point
816
- *
817
- * If deleted point was replaced returns its label, else returns label of added point
836
+ * If deleted point was replaced returns its label, else returns label of added or updated point
837
+ *
838
+ * Note:
839
+ * Methods that can work with deleted elements unmarkDelete and addPoint are not safe to use
840
+ * with this method. Because addPointToVacantPlace removes deleted elements from the index.
818
841
*/
819
842
labeltype addPointToVacantPlace (const void * data_point, labeltype label) {
820
843
if (!replace_deleted_) {
821
844
throw std::runtime_error (" Can't use addPointToVacantPlace when replacement of deleted elements is disabled" );
822
845
}
823
846
824
- std::unique_lock <std::mutex> tmp_del_el_lock (deleted_elements_lock);
825
- bool is_empty = deleted_elements. empty () ;
826
- tmp_del_el_lock. unlock ( );
827
-
828
- if (is_empty ) {
829
- addPoint (data_point, label );
830
- return label ;
847
+ // check if there is vacant place
848
+ tableint internal_id_replaced ;
849
+ std::unique_lock <std::mutex> lock_deleted_elements (deleted_elements_lock );
850
+ bool is_vacant_place = !deleted_elements. empty ();
851
+ if (is_vacant_place ) {
852
+ internal_id_replaced = *deleted_elements. begin ( );
853
+ deleted_elements. erase (internal_id_replaced) ;
831
854
}
832
- else {
833
- tmp_del_el_lock.lock ();
834
- tableint id_replace = *deleted_elements.begin ();
835
- deleted_elements.erase (id_replace);
836
- tmp_del_el_lock.unlock ();
837
-
838
- // use link list locks to not block calls for other elements
839
- std::unique_lock <std::mutex> lock_label_update (link_list_update_locks_[(id_replace & (max_update_element_locks - 1 ))]);
840
- labeltype label_replace = getExternalLabel (id_replace);
841
- setExternalLabel (id_replace, label);
842
- lock_label_update.unlock ();
843
-
844
- std::unique_lock <std::mutex> tmp_label_lookup_lock (label_lookup_lock);
845
- label_lookup_.erase (label_replace);
846
- label_lookup_[label] = id_replace;
847
- tmp_label_lookup_lock.unlock ();
855
+ lock_deleted_elements.unlock ();
848
856
857
+ // if there is no vacant place then add or update point
858
+ // else add point to vacant place
859
+ if (!is_vacant_place) {
849
860
addPoint (data_point, label);
850
-
851
- return label_replace;
861
+ return label;
862
+ } else {
863
+ // wait for element addition or update
864
+ std::unique_lock <std::mutex> lock_el_update (link_list_update_locks_[(internal_id_replaced & (max_update_element_locks - 1 ))]);
865
+ labeltype label_replaced = getExternalLabel (internal_id_replaced);
866
+ setExternalLabel (internal_id_replaced, label);
867
+ lock_el_update.unlock ();
868
+
869
+ std::unique_lock <std::mutex> lock_table (label_lookup_lock);
870
+ label_lookup_.erase (label_replaced);
871
+ label_lookup_[label] = internal_id_replaced;
872
+ lock_table.unlock ();
873
+
874
+ lock_el_update.lock ();
875
+ unmarkDeletedInternal (internal_id_replaced);
876
+ updatePoint (data_point, internal_id_replaced, 1.0 );
877
+
878
+ return label_replaced;
852
879
}
853
880
}
854
881
@@ -1024,11 +1051,18 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
1024
1051
{
1025
1052
// Checking if the element with the same label already exists
1026
1053
// if so, updating it *instead* of creating a new element.
1027
- std::unique_lock <std::mutex> templock_curr (cur_element_count_guard_ );
1054
+ std::unique_lock <std::mutex> lock_table (label_lookup_lock );
1028
1055
auto search = label_lookup_.find (label);
1029
1056
if (search != label_lookup_.end ()) {
1030
1057
tableint existingInternalId = search->second ;
1031
- templock_curr.unlock ();
1058
+ if (replace_deleted_) {
1059
+ // wait for element addition or update
1060
+ std::unique_lock <std::mutex> lock_el_update (link_list_update_locks_[(existingInternalId & (max_update_element_locks - 1 ))]);
1061
+ if (isMarkedDeleted (existingInternalId)) {
1062
+ throw std::runtime_error (" Can't use addPoint to update deleted elements if replacement of deleted elements is enabled." );
1063
+ }
1064
+ }
1065
+ lock_table.unlock ();
1032
1066
1033
1067
std::unique_lock <std::mutex> lock_el_update (link_list_update_locks_[(existingInternalId & (max_update_element_locks - 1 ))]);
1034
1068
0 commit comments