@@ -22,7 +22,6 @@ mod levenshtein_dfa;
22
22
mod memory_index;
23
23
pub mod metrics;
24
24
pub mod query;
25
- mod ranking;
26
25
pub mod scoring;
27
26
mod search_index_manager;
28
27
pub mod searcher;
@@ -46,10 +45,6 @@ use common::{
46
45
} ,
47
46
document:: ResolvedDocument ,
48
47
index:: IndexKeyBytes ,
49
- knobs:: {
50
- SEARCHLIGHT_CLUSTER_NAME ,
51
- USE_MULTI_SEGMENT_SEARCH_QUERY ,
52
- } ,
53
48
query:: {
54
49
search_value_to_bytes,
55
50
InternalSearch ,
@@ -112,7 +107,6 @@ use tantivy::{
112
107
} ;
113
108
pub use tantivy_query:: SearchQueryResult ;
114
109
use value:: {
115
- sorting:: TotalOrdF64 ,
116
110
values_to_bytes,
117
111
ConvexValue ,
118
112
FieldPath ,
@@ -153,7 +147,6 @@ use crate::{
153
147
aggregation:: TokenMatchAggregator ,
154
148
constants:: MAX_UNIQUE_QUERY_TERMS ,
155
149
metrics:: log_num_segments_searched_total,
156
- ranking:: Ranker ,
157
150
searcher:: {
158
151
Bm25Stats ,
159
152
PostingListQuery ,
@@ -685,154 +678,25 @@ impl TantivySearchIndexSchema {
685
678
disk_index_ts : Timestamp ,
686
679
searcher : Arc < dyn Searcher > ,
687
680
) -> anyhow:: Result < RevisionWithKeys > {
688
- if * USE_MULTI_SEGMENT_SEARCH_QUERY {
689
- let number_of_segments = searcher
690
- . number_of_segments ( search_storage. clone ( ) , disk_index. clone ( ) )
691
- . await ?;
692
- let segments = ( 0 ..number_of_segments)
693
- . map ( |i| TextStorageKeys :: SingleSegment {
694
- storage_key : disk_index. clone ( ) ,
695
- segment_ord : i as u32 ,
696
- } )
697
- . collect ( ) ;
698
- return self
699
- . search2 (
700
- runtime,
701
- compiled_query,
702
- memory_index,
703
- search_storage,
704
- segments,
705
- disk_index_ts,
706
- searcher,
707
- )
708
- . await ;
709
- }
710
- // 1. Fetch the memory index matches for each QueryTerm in the query and bound.
711
- let ( term_shortlist, term_shortlist_ids) =
712
- memory_index. bound_and_evaluate_query_terms ( & compiled_query. text_query ) ;
713
-
714
- // 2. For the shortlisted terms, get the BM25 statistics for each term in the
715
- // memory index.
716
- let memory_stats_diff =
717
- memory_index. bm25_statistics_diff ( disk_index_ts, & term_shortlist. terms ( ) ) ?;
718
-
719
- // 3. Query memory index tombstones to count overfetch_delta
720
- //
721
- // Our goal is to end up with the top MAX_CANDIDATE_REVISIONS.
722
- // Some of the ones in searchlight will be filtered out if they were edited
723
- // since disk_index_ts. Count how many that is and fetch extra!
724
- let tombstoned_matches = {
725
- let term_list_query = memory_index. build_term_list_bitset_query (
726
- & compiled_query,
727
- & term_shortlist,
728
- & term_shortlist_ids,
729
- ) ;
730
- memory_index. tombstoned_matches ( disk_index_ts, & term_list_query) ?
731
- } ;
732
- let overfetch_delta = tombstoned_matches. len ( ) ;
733
- metrics:: log_searchlight_overfetch_delta ( overfetch_delta) ;
734
- let limit = MAX_CANDIDATE_REVISIONS + overfetch_delta;
735
-
736
- // 4. Do disk query
737
- let search_results = {
738
- let timer = metrics:: searchlight_client_execute_timer ( & SEARCHLIGHT_CLUSTER_NAME ) ;
739
- let results = searcher
740
- . execute_query (
741
- search_storage,
742
- disk_index,
743
- self ,
744
- compiled_query. clone ( ) ,
745
- memory_stats_diff,
746
- term_shortlist,
747
- limit,
748
- )
749
- . await ?;
750
- metrics:: finish_searchlight_client_execute ( timer, & results) ;
751
- results
752
- } ;
753
-
754
- // 5. Do memory index query
755
- let combined_term_shortlist = search_results. combined_shortlisted_terms ;
756
- let combined_term_ids =
757
- memory_index. evaluate_shortlisted_query_terms ( & combined_term_shortlist) ;
758
- let memory_revisions = {
759
- let term_list_query = memory_index. build_term_list_bitset_query (
760
- & compiled_query,
761
- & combined_term_shortlist,
762
- & combined_term_ids,
763
- ) ;
764
- let term_weights = build_term_weights (
765
- & combined_term_shortlist,
766
- & combined_term_ids,
767
- & term_list_query,
768
- search_results. combined_statistics ,
769
- ) ?;
770
- memory_index. query (
771
- disk_index_ts,
772
- & term_list_query,
773
- & combined_term_ids,
774
- & term_weights,
775
- ) ?
776
- } ;
777
-
778
- // 6. Filter out tombstones
779
- let current_disk_revisions = search_results
780
- . results
781
- . into_iter ( )
782
- . filter ( |revision| !tombstoned_matches. contains ( & revision. revision . id ) ) ;
783
-
784
- // 7. Use Bm25 to score top retrieval results
785
- let mut revisions_with_keys: Vec < _ > = memory_revisions
786
- . into_iter ( )
787
- . chain ( current_disk_revisions)
788
- . map ( |candidate| {
789
- (
790
- (
791
- TotalOrdF64 :: from ( -f64:: from ( candidate. revision . score ) ) ,
792
- TotalOrdF64 :: from ( -f64:: from ( candidate. revision . creation_time ) ) ,
793
- Vec :: < u8 > :: from ( candidate. revision . id ) ,
794
- ) ,
795
- candidate,
796
- )
681
+ let number_of_segments = searcher
682
+ . number_of_segments ( search_storage. clone ( ) , disk_index. clone ( ) )
683
+ . await ?;
684
+ let segments = ( 0 ..number_of_segments)
685
+ . map ( |i| TextStorageKeys :: SingleSegment {
686
+ storage_key : disk_index. clone ( ) ,
687
+ segment_ord : i as u32 ,
797
688
} )
798
689
. collect ( ) ;
799
- revisions_with_keys. sort_by_key ( |( key, _) | key. clone ( ) ) ;
800
- let original_len = revisions_with_keys. len ( ) ;
801
- revisions_with_keys. truncate ( MAX_CANDIDATE_REVISIONS ) ;
802
- metrics:: log_num_discarded_revisions ( original_len - revisions_with_keys. len ( ) ) ;
803
-
804
- // 8. Rank results
805
- let ranker = Ranker :: create ( & compiled_query. text_query , & combined_term_shortlist) ;
806
- let mut ranked_revisions: Vec < _ > = revisions_with_keys
807
- . into_iter ( )
808
- . map ( |( _, candidate) | {
809
- // Search results are in decreasing score order and then tie break
810
- // with decreasing creation time (newest first).
811
- //
812
- // This isn't a true index key -- notably, the last value is not the
813
- // document ID, but we're just using the index key bytes for sorting
814
- // and paginating search results within a table.
815
- let ranking_score = ranker. score ( & candidate) ;
816
-
817
- let index_fields = vec ! [
818
- Some ( ConvexValue :: Float64 ( -f64 :: from( ranking_score) ) ) ,
819
- Some ( ConvexValue :: Float64 ( -f64 :: from(
820
- candidate. revision. creation_time,
821
- ) ) ) ,
822
- Some ( ConvexValue :: Bytes (
823
- Vec :: <u8 >:: from( candidate. revision. id)
824
- . try_into( )
825
- . expect( "Could not convert internal ID to value" ) ,
826
- ) ) ,
827
- ] ;
828
- let bytes = values_to_bytes ( & index_fields) ;
829
- let index_key_bytes = IndexKeyBytes ( bytes) ;
830
- ( CandidateRevision :: from ( candidate) , index_key_bytes)
831
- } )
832
- . collect ( ) ;
833
- ranked_revisions. sort_by_key ( |( _, key) | key. clone ( ) ) ;
834
-
835
- Ok ( ranked_revisions)
690
+ self . search2 (
691
+ runtime,
692
+ compiled_query,
693
+ memory_index,
694
+ search_storage,
695
+ segments,
696
+ disk_index_ts,
697
+ searcher,
698
+ )
699
+ . await
836
700
}
837
701
838
702
fn compile_tokens_with_typo_tolerance (
0 commit comments