Skip to content

RCBC-467: Add support for vector search #131

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ext/couchbase
35 changes: 27 additions & 8 deletions ext/couchbase.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -7245,7 +7245,7 @@ cb_Backend_search_index_analyze_document(VALUE self, VALUE index_name, VALUE enc
}

static VALUE
cb_Backend_document_search(VALUE self, VALUE index_name, VALUE query, VALUE options)
cb_Backend_document_search(VALUE self, VALUE index_name, VALUE query, VALUE search_request, VALUE options)
{
const auto& cluster = cb_backend_to_cluster(self);

Expand All @@ -7268,6 +7268,25 @@ cb_Backend_document_search(VALUE self, VALUE index_name, VALUE query, VALUE opti
cb_extract_option_bool(req.explain, options, "explain");
cb_extract_option_bool(req.disable_scoring, options, "disable_scoring");
cb_extract_option_bool(req.include_locations, options, "include_locations");
cb_extract_option_bool(req.show_request, options, "show_request");

if (VALUE vector_options = rb_hash_aref(search_request, rb_id2sym(rb_intern("vector_search"))); !NIL_P(vector_options)) {
cb_check_type(vector_options, T_HASH);
if (VALUE vector_queries = rb_hash_aref(vector_options, rb_id2sym(rb_intern("vector_queries"))); !NIL_P(vector_queries)) {
cb_check_type(vector_queries, T_STRING);
req.vector_search = cb_string_new(vector_queries);
}
if (VALUE vector_query_combination = rb_hash_aref(vector_options, rb_id2sym(rb_intern("vector_query_combination")));
!NIL_P(vector_query_combination)) {
cb_check_type(vector_query_combination, T_SYMBOL);
ID type = rb_sym2id(vector_query_combination);
if (type == rb_intern("and")) {
req.vector_query_combination = couchbase::core::vector_query_combination::combination_and;
} else if (type == rb_intern("or")) {
req.vector_query_combination = couchbase::core::vector_query_combination::combination_or;
}
}
}

if (VALUE skip = rb_hash_aref(options, rb_id2sym(rb_intern("skip"))); !NIL_P(skip)) {
cb_check_type(skip, T_FIXNUM);
Expand Down Expand Up @@ -7431,17 +7450,17 @@ cb_Backend_document_search(VALUE self, VALUE index_name, VALUE query, VALUE opti
VALUE locations = rb_ary_new_capa(static_cast<long>(entry.locations.size()));
for (const auto& loc : entry.locations) {
VALUE location = rb_hash_new();
rb_hash_aset(row, rb_id2sym(rb_intern("field")), cb_str_new(loc.field));
rb_hash_aset(row, rb_id2sym(rb_intern("term")), cb_str_new(loc.term));
rb_hash_aset(row, rb_id2sym(rb_intern("pos")), ULL2NUM(loc.position));
rb_hash_aset(row, rb_id2sym(rb_intern("start_offset")), ULL2NUM(loc.start_offset));
rb_hash_aset(row, rb_id2sym(rb_intern("end_offset")), ULL2NUM(loc.end_offset));
rb_hash_aset(location, rb_id2sym(rb_intern("field")), cb_str_new(loc.field));
rb_hash_aset(location, rb_id2sym(rb_intern("term")), cb_str_new(loc.term));
rb_hash_aset(location, rb_id2sym(rb_intern("pos")), ULL2NUM(loc.position));
rb_hash_aset(location, rb_id2sym(rb_intern("start_offset")), ULL2NUM(loc.start_offset));
rb_hash_aset(location, rb_id2sym(rb_intern("end_offset")), ULL2NUM(loc.end_offset));
if (loc.array_positions) {
VALUE ap = rb_ary_new_capa(static_cast<long>(loc.array_positions->size()));
for (const auto& pos : *loc.array_positions) {
rb_ary_push(ap, ULL2NUM(pos));
}
rb_hash_aset(row, rb_id2sym(rb_intern("array_positions")), ap);
rb_hash_aset(location, rb_id2sym(rb_intern("array_positions")), ap);
}
rb_ary_push(locations, location);
}
Expand Down Expand Up @@ -9304,7 +9323,7 @@ init_backend(VALUE mCouchbase)
rb_define_method(cBackend, "document_unlock", VALUE_FUNC(cb_Backend_document_unlock), 6);
rb_define_method(cBackend, "document_increment", VALUE_FUNC(cb_Backend_document_increment), 5);
rb_define_method(cBackend, "document_decrement", VALUE_FUNC(cb_Backend_document_decrement), 5);
rb_define_method(cBackend, "document_search", VALUE_FUNC(cb_Backend_document_search), 3);
rb_define_method(cBackend, "document_search", VALUE_FUNC(cb_Backend_document_search), 4);
rb_define_method(cBackend, "document_analytics", VALUE_FUNC(cb_Backend_document_analytics), 2);
rb_define_method(cBackend, "document_view", VALUE_FUNC(cb_Backend_document_view), 5);

Expand Down
187 changes: 103 additions & 84 deletions lib/couchbase/cluster.rb
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def analytics_query(statement, options = Options::Analytics::DEFAULT)
#
# @param [String] index_name the name of the search index
# @param [SearchQuery] query the query tree
# @param [Options::Search] options the query tree
# @param [Options::Search] options the custom options for this search query
#
# @example Return first 10 results of "hop beer" query and request highlighting
# cluster.search_query("beer_index", Cluster::SearchQuery.match_phrase("hop beer"),
Expand All @@ -187,90 +187,23 @@ def analytics_query(statement, options = Options::Analytics::DEFAULT)
#
# @return [SearchResult]
def search_query(index_name, query, options = Options::Search::DEFAULT)
resp = @backend.document_search(index_name, JSON.generate(query), options.to_backend)
resp = @backend.document_search(index_name, JSON.generate(query), {}, options.to_backend)
convert_search_result(resp, options)
end

SearchResult.new do |res|
res.meta_data = SearchMetaData.new do |meta|
meta.metrics.max_score = resp[:meta_data][:metrics][:max_score]
meta.metrics.error_partition_count = resp[:meta_data][:metrics][:error_partition_count]
meta.metrics.success_partition_count = resp[:meta_data][:metrics][:success_partition_count]
meta.metrics.took = resp[:meta_data][:metrics][:took]
meta.metrics.total_rows = resp[:meta_data][:metrics][:total_rows]
meta.errors = resp[:meta_data][:errors]
end
res.rows = resp[:rows].map do |r|
SearchRow.new do |row|
row.transcoder = options.transcoder
row.index = r[:index]
row.id = r[:id]
row.score = r[:score]
row.fragments = r[:fragments]
unless r[:locations].empty?
row.locations = SearchRowLocations.new(
r[:locations].map do |loc|
SearchRowLocation.new do |location|
location.field = loc[:field]
location.term = loc[:term]
location.position = loc[:position]
location.start_offset = loc[:start_offset]
location.end_offset = loc[:end_offset]
location.array_positions = loc[:array_positions]
end
end
)
end
row.instance_variable_set(:@fields, r[:fields])
row.explanation = JSON.parse(r[:explanation]) if r[:explanation]
end
end
if resp[:facets]
res.facets = resp[:facets].each_with_object({}) do |(k, v), o|
facet = case options.facets[k]
when SearchFacet::SearchFacetTerm
SearchFacetResult::TermFacetResult.new do |f|
f.terms =
if v[:terms]
v[:terms].map do |t|
SearchFacetResult::TermFacetResult::TermFacet.new(t[:term], t[:count])
end
else
[]
end
end
when SearchFacet::SearchFacetDateRange
SearchFacetResult::DateRangeFacetResult.new do |f|
f.date_ranges =
if v[:date_ranges]
v[:date_ranges].map do |r|
SearchFacetResult::DateRangeFacetResult::DateRangeFacet.new(r[:name], r[:count], r[:start_time], r[:end_time])
end
else
[]
end
end
when SearchFacet::SearchFacetNumericRange
SearchFacetResult::NumericRangeFacetResult.new do |f|
f.numeric_ranges =
if v[:numeric_ranges]
v[:numeric_ranges].map do |r|
SearchFacetResult::NumericRangeFacetResult::NumericRangeFacet.new(r[:name], r[:count], r[:min], r[:max])
end
else
[]
end
end
else
next # ignore unknown facet result
end
facet.name = v[:name]
facet.field = v[:field]
facet.total = v[:total]
facet.missing = v[:missing]
facet.other = v[:other]
o[k] = facet
end
end
end
# Performs a request against the Full Text Search (FTS) service.
#
# @api volatile
#
# @param [String] index_name the name of the search index
# @param [SearchRequest] search_request the request
# @param [Options::Search] options the custom options for this search request
#
# @return [SearchResult]
def search(index_name, search_request, options = Options::Search::DEFAULT)
encoded_query, encoded_req = search_request.to_backend
resp = @backend.document_search(index_name, encoded_query, encoded_req, options.to_backend(show_request: false))
convert_search_result(resp, options)
end

# @return [Management::UserManager]
Expand Down Expand Up @@ -427,6 +360,92 @@ def initialize(connection_string, *args)
@backend.open(connection_string, credentials, open_options)
end

# @api private
def convert_search_result(resp, options)
SearchResult.new do |res|
res.meta_data = SearchMetaData.new do |meta|
meta.metrics.max_score = resp[:meta_data][:metrics][:max_score]
meta.metrics.error_partition_count = resp[:meta_data][:metrics][:error_partition_count]
meta.metrics.success_partition_count = resp[:meta_data][:metrics][:success_partition_count]
meta.metrics.took = resp[:meta_data][:metrics][:took]
meta.metrics.total_rows = resp[:meta_data][:metrics][:total_rows]
meta.errors = resp[:meta_data][:errors]
end
res.rows = resp[:rows].map do |r|
SearchRow.new do |row|
row.transcoder = options.transcoder
row.index = r[:index]
row.id = r[:id]
row.score = r[:score]
row.fragments = r[:fragments]
unless r[:locations].empty?
row.locations = SearchRowLocations.new(
r[:locations].map do |loc|
SearchRowLocation.new do |location|
location.field = loc[:field]
location.term = loc[:term]
location.position = loc[:position]
location.start_offset = loc[:start_offset]
location.end_offset = loc[:end_offset]
location.array_positions = loc[:array_positions]
end
end
)
end
row.instance_variable_set(:@fields, r[:fields])
row.explanation = JSON.parse(r[:explanation]) if r[:explanation]
end
end
if resp[:facets]
res.facets = resp[:facets].each_with_object({}) do |(k, v), o|
facet = case options.facets[k]
when SearchFacet::SearchFacetTerm
SearchFacetResult::TermFacetResult.new do |f|
f.terms =
if v[:terms]
v[:terms].map do |t|
SearchFacetResult::TermFacetResult::TermFacet.new(t[:term], t[:count])
end
else
[]
end
end
when SearchFacet::SearchFacetDateRange
SearchFacetResult::DateRangeFacetResult.new do |f|
f.date_ranges =
if v[:date_ranges]
v[:date_ranges].map do |r|
SearchFacetResult::DateRangeFacetResult::DateRangeFacet.new(r[:name], r[:count], r[:start_time], r[:end_time])
end
else
[]
end
end
when SearchFacet::SearchFacetNumericRange
SearchFacetResult::NumericRangeFacetResult.new do |f|
f.numeric_ranges =
if v[:numeric_ranges]
v[:numeric_ranges].map do |r|
SearchFacetResult::NumericRangeFacetResult::NumericRangeFacet.new(r[:name], r[:count], r[:min], r[:max])
end
else
[]
end
end
else
next # ignore unknown facet result
end
facet.name = v[:name]
facet.field = v[:field]
facet.total = v[:total]
facet.missing = v[:missing]
facet.other = v[:other]
o[k] = facet
end
end
end
end

# @api private
ClusterOptions = ::Couchbase::Options::Cluster
# @api private
Expand Down
29 changes: 26 additions & 3 deletions lib/couchbase/options.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2213,7 +2213,7 @@ def to_backend(scope_name: nil, bucket_name: nil)
DEFAULT = Query.new.freeze
end

# Options for {Couchbase::Cluster#search_query}
# Options for {Couchbase::Cluster#search_query} and {Couchbase::Cluster#search}
class Search < Base
attr_accessor :limit # @return [Integer]
attr_accessor :skip # @return [Integer]
Expand Down Expand Up @@ -2321,11 +2321,11 @@ def scan_consistency=(level)
attr_reader :mutation_state

# @api private
# @return [Symbol
# @return [Symbol]
attr_reader :scan_consistency

# @api private
def to_backend(*)
def to_backend(show_request: nil)
{
timeout: Utils::Time.extract_duration(@timeout),
limit: @limit,
Expand All @@ -2341,13 +2341,36 @@ def to_backend(*)
facets: @facets&.map { |(k, v)| [k, JSON.generate(v)] },
scan_consistency: @scan_consistency,
mutation_state: @mutation_state&.to_a,
show_request: show_request,
}
end

# @api private
DEFAULT = Search.new.freeze
end

class VectorSearch
# @return [:and, :or, nil]
attr_accessor :vector_query_combination

# @param [:and, :or, nil] vector_query_combination
#
# @yieldparam [VectorSearchOptions] self
def initialize(vector_query_combination: nil)
@vector_query_combination = vector_query_combination

yield self if block_given?
end

def to_backend
{
vector_query_combination: @vector_query_combination,
}
end

DEFAULT = VectorSearch.new.freeze
end

# Options for {Couchbase::Cluster#view_query}
class View < Base
attr_accessor :scan_consistency # @return [Symbol]
Expand Down
Loading