Skip to content

Commit e83e9f5

Browse files
committed
RCBC-467: Add support for vector search
1 parent 3423b68 commit e83e9f5

File tree

6 files changed

+384
-104
lines changed

6 files changed

+384
-104
lines changed

ext/couchbase

ext/couchbase.cxx

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7245,7 +7245,7 @@ cb_Backend_search_index_analyze_document(VALUE self, VALUE index_name, VALUE enc
72457245
}
72467246

72477247
static VALUE
7248-
cb_Backend_document_search(VALUE self, VALUE index_name, VALUE query, VALUE options)
7248+
cb_Backend_document_search(VALUE self, VALUE index_name, VALUE query, VALUE search_request, VALUE options)
72497249
{
72507250
const auto& cluster = cb_backend_to_cluster(self);
72517251

@@ -7268,6 +7268,25 @@ cb_Backend_document_search(VALUE self, VALUE index_name, VALUE query, VALUE opti
72687268
cb_extract_option_bool(req.explain, options, "explain");
72697269
cb_extract_option_bool(req.disable_scoring, options, "disable_scoring");
72707270
cb_extract_option_bool(req.include_locations, options, "include_locations");
7271+
cb_extract_option_bool(req.show_request, options, "show_request");
7272+
7273+
if (VALUE vector_options = rb_hash_aref(search_request, rb_id2sym(rb_intern("vector_search"))); !NIL_P(vector_options)) {
7274+
cb_check_type(vector_options, T_HASH);
7275+
if (VALUE vector_queries = rb_hash_aref(vector_options, rb_id2sym(rb_intern("vector_queries"))); !NIL_P(vector_queries)) {
7276+
cb_check_type(vector_queries, T_STRING);
7277+
req.vector_search = cb_string_new(vector_queries);
7278+
}
7279+
if (VALUE vector_query_combination = rb_hash_aref(vector_options, rb_id2sym(rb_intern("vector_query_combination")));
7280+
!NIL_P(vector_query_combination)) {
7281+
cb_check_type(vector_query_combination, T_SYMBOL);
7282+
ID type = rb_sym2id(vector_query_combination);
7283+
if (type == rb_intern("and")) {
7284+
req.vector_query_combination = couchbase::core::vector_query_combination::combination_and;
7285+
} else if (type == rb_intern("or")) {
7286+
req.vector_query_combination = couchbase::core::vector_query_combination::combination_or;
7287+
}
7288+
}
7289+
}
72717290

72727291
if (VALUE skip = rb_hash_aref(options, rb_id2sym(rb_intern("skip"))); !NIL_P(skip)) {
72737292
cb_check_type(skip, T_FIXNUM);
@@ -7431,17 +7450,17 @@ cb_Backend_document_search(VALUE self, VALUE index_name, VALUE query, VALUE opti
74317450
VALUE locations = rb_ary_new_capa(static_cast<long>(entry.locations.size()));
74327451
for (const auto& loc : entry.locations) {
74337452
VALUE location = rb_hash_new();
7434-
rb_hash_aset(row, rb_id2sym(rb_intern("field")), cb_str_new(loc.field));
7435-
rb_hash_aset(row, rb_id2sym(rb_intern("term")), cb_str_new(loc.term));
7436-
rb_hash_aset(row, rb_id2sym(rb_intern("pos")), ULL2NUM(loc.position));
7437-
rb_hash_aset(row, rb_id2sym(rb_intern("start_offset")), ULL2NUM(loc.start_offset));
7438-
rb_hash_aset(row, rb_id2sym(rb_intern("end_offset")), ULL2NUM(loc.end_offset));
7453+
rb_hash_aset(location, rb_id2sym(rb_intern("field")), cb_str_new(loc.field));
7454+
rb_hash_aset(location, rb_id2sym(rb_intern("term")), cb_str_new(loc.term));
7455+
rb_hash_aset(location, rb_id2sym(rb_intern("pos")), ULL2NUM(loc.position));
7456+
rb_hash_aset(location, rb_id2sym(rb_intern("start_offset")), ULL2NUM(loc.start_offset));
7457+
rb_hash_aset(location, rb_id2sym(rb_intern("end_offset")), ULL2NUM(loc.end_offset));
74397458
if (loc.array_positions) {
74407459
VALUE ap = rb_ary_new_capa(static_cast<long>(loc.array_positions->size()));
74417460
for (const auto& pos : *loc.array_positions) {
74427461
rb_ary_push(ap, ULL2NUM(pos));
74437462
}
7444-
rb_hash_aset(row, rb_id2sym(rb_intern("array_positions")), ap);
7463+
rb_hash_aset(location, rb_id2sym(rb_intern("array_positions")), ap);
74457464
}
74467465
rb_ary_push(locations, location);
74477466
}
@@ -9304,7 +9323,7 @@ init_backend(VALUE mCouchbase)
93049323
rb_define_method(cBackend, "document_unlock", VALUE_FUNC(cb_Backend_document_unlock), 6);
93059324
rb_define_method(cBackend, "document_increment", VALUE_FUNC(cb_Backend_document_increment), 5);
93069325
rb_define_method(cBackend, "document_decrement", VALUE_FUNC(cb_Backend_document_decrement), 5);
9307-
rb_define_method(cBackend, "document_search", VALUE_FUNC(cb_Backend_document_search), 3);
9326+
rb_define_method(cBackend, "document_search", VALUE_FUNC(cb_Backend_document_search), 4);
93089327
rb_define_method(cBackend, "document_analytics", VALUE_FUNC(cb_Backend_document_analytics), 2);
93099328
rb_define_method(cBackend, "document_view", VALUE_FUNC(cb_Backend_document_view), 5);
93109329

lib/couchbase/cluster.rb

Lines changed: 103 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ def analytics_query(statement, options = Options::Analytics::DEFAULT)
174174
#
175175
# @param [String] index_name the name of the search index
176176
# @param [SearchQuery] query the query tree
177-
# @param [Options::Search] options the query tree
177+
# @param [Options::Search] options the custom options for this search query
178178
#
179179
# @example Return first 10 results of "hop beer" query and request highlighting
180180
# cluster.search_query("beer_index", Cluster::SearchQuery.match_phrase("hop beer"),
@@ -187,90 +187,23 @@ def analytics_query(statement, options = Options::Analytics::DEFAULT)
187187
#
188188
# @return [SearchResult]
189189
def search_query(index_name, query, options = Options::Search::DEFAULT)
190-
resp = @backend.document_search(index_name, JSON.generate(query), options.to_backend)
190+
resp = @backend.document_search(index_name, JSON.generate(query), {}, options.to_backend)
191+
convert_search_result(resp, options)
192+
end
191193

192-
SearchResult.new do |res|
193-
res.meta_data = SearchMetaData.new do |meta|
194-
meta.metrics.max_score = resp[:meta_data][:metrics][:max_score]
195-
meta.metrics.error_partition_count = resp[:meta_data][:metrics][:error_partition_count]
196-
meta.metrics.success_partition_count = resp[:meta_data][:metrics][:success_partition_count]
197-
meta.metrics.took = resp[:meta_data][:metrics][:took]
198-
meta.metrics.total_rows = resp[:meta_data][:metrics][:total_rows]
199-
meta.errors = resp[:meta_data][:errors]
200-
end
201-
res.rows = resp[:rows].map do |r|
202-
SearchRow.new do |row|
203-
row.transcoder = options.transcoder
204-
row.index = r[:index]
205-
row.id = r[:id]
206-
row.score = r[:score]
207-
row.fragments = r[:fragments]
208-
unless r[:locations].empty?
209-
row.locations = SearchRowLocations.new(
210-
r[:locations].map do |loc|
211-
SearchRowLocation.new do |location|
212-
location.field = loc[:field]
213-
location.term = loc[:term]
214-
location.position = loc[:position]
215-
location.start_offset = loc[:start_offset]
216-
location.end_offset = loc[:end_offset]
217-
location.array_positions = loc[:array_positions]
218-
end
219-
end
220-
)
221-
end
222-
row.instance_variable_set(:@fields, r[:fields])
223-
row.explanation = JSON.parse(r[:explanation]) if r[:explanation]
224-
end
225-
end
226-
if resp[:facets]
227-
res.facets = resp[:facets].each_with_object({}) do |(k, v), o|
228-
facet = case options.facets[k]
229-
when SearchFacet::SearchFacetTerm
230-
SearchFacetResult::TermFacetResult.new do |f|
231-
f.terms =
232-
if v[:terms]
233-
v[:terms].map do |t|
234-
SearchFacetResult::TermFacetResult::TermFacet.new(t[:term], t[:count])
235-
end
236-
else
237-
[]
238-
end
239-
end
240-
when SearchFacet::SearchFacetDateRange
241-
SearchFacetResult::DateRangeFacetResult.new do |f|
242-
f.date_ranges =
243-
if v[:date_ranges]
244-
v[:date_ranges].map do |r|
245-
SearchFacetResult::DateRangeFacetResult::DateRangeFacet.new(r[:name], r[:count], r[:start_time], r[:end_time])
246-
end
247-
else
248-
[]
249-
end
250-
end
251-
when SearchFacet::SearchFacetNumericRange
252-
SearchFacetResult::NumericRangeFacetResult.new do |f|
253-
f.numeric_ranges =
254-
if v[:numeric_ranges]
255-
v[:numeric_ranges].map do |r|
256-
SearchFacetResult::NumericRangeFacetResult::NumericRangeFacet.new(r[:name], r[:count], r[:min], r[:max])
257-
end
258-
else
259-
[]
260-
end
261-
end
262-
else
263-
next # ignore unknown facet result
264-
end
265-
facet.name = v[:name]
266-
facet.field = v[:field]
267-
facet.total = v[:total]
268-
facet.missing = v[:missing]
269-
facet.other = v[:other]
270-
o[k] = facet
271-
end
272-
end
273-
end
194+
# Performs a request against the Full Text Search (FTS) service.
195+
#
196+
# @api volatile
197+
#
198+
# @param [String] index_name the name of the search index
199+
# @param [SearchRequest] search_request the request
200+
# @param [Options::Search] options the custom options for this search request
201+
#
202+
# @return [SearchResult]
203+
def search(index_name, search_request, options = Options::Search::DEFAULT)
204+
encoded_query, encoded_req = search_request.to_backend
205+
resp = @backend.document_search(index_name, encoded_query, encoded_req, options.to_backend(show_request: false))
206+
convert_search_result(resp, options)
274207
end
275208

276209
# @return [Management::UserManager]
@@ -427,6 +360,92 @@ def initialize(connection_string, *args)
427360
@backend.open(connection_string, credentials, open_options)
428361
end
429362

363+
# @api private
364+
def convert_search_result(resp, options)
365+
SearchResult.new do |res|
366+
res.meta_data = SearchMetaData.new do |meta|
367+
meta.metrics.max_score = resp[:meta_data][:metrics][:max_score]
368+
meta.metrics.error_partition_count = resp[:meta_data][:metrics][:error_partition_count]
369+
meta.metrics.success_partition_count = resp[:meta_data][:metrics][:success_partition_count]
370+
meta.metrics.took = resp[:meta_data][:metrics][:took]
371+
meta.metrics.total_rows = resp[:meta_data][:metrics][:total_rows]
372+
meta.errors = resp[:meta_data][:errors]
373+
end
374+
res.rows = resp[:rows].map do |r|
375+
SearchRow.new do |row|
376+
row.transcoder = options.transcoder
377+
row.index = r[:index]
378+
row.id = r[:id]
379+
row.score = r[:score]
380+
row.fragments = r[:fragments]
381+
unless r[:locations].empty?
382+
row.locations = SearchRowLocations.new(
383+
r[:locations].map do |loc|
384+
SearchRowLocation.new do |location|
385+
location.field = loc[:field]
386+
location.term = loc[:term]
387+
location.position = loc[:position]
388+
location.start_offset = loc[:start_offset]
389+
location.end_offset = loc[:end_offset]
390+
location.array_positions = loc[:array_positions]
391+
end
392+
end
393+
)
394+
end
395+
row.instance_variable_set(:@fields, r[:fields])
396+
row.explanation = JSON.parse(r[:explanation]) if r[:explanation]
397+
end
398+
end
399+
if resp[:facets]
400+
res.facets = resp[:facets].each_with_object({}) do |(k, v), o|
401+
facet = case options.facets[k]
402+
when SearchFacet::SearchFacetTerm
403+
SearchFacetResult::TermFacetResult.new do |f|
404+
f.terms =
405+
if v[:terms]
406+
v[:terms].map do |t|
407+
SearchFacetResult::TermFacetResult::TermFacet.new(t[:term], t[:count])
408+
end
409+
else
410+
[]
411+
end
412+
end
413+
when SearchFacet::SearchFacetDateRange
414+
SearchFacetResult::DateRangeFacetResult.new do |f|
415+
f.date_ranges =
416+
if v[:date_ranges]
417+
v[:date_ranges].map do |r|
418+
SearchFacetResult::DateRangeFacetResult::DateRangeFacet.new(r[:name], r[:count], r[:start_time], r[:end_time])
419+
end
420+
else
421+
[]
422+
end
423+
end
424+
when SearchFacet::SearchFacetNumericRange
425+
SearchFacetResult::NumericRangeFacetResult.new do |f|
426+
f.numeric_ranges =
427+
if v[:numeric_ranges]
428+
v[:numeric_ranges].map do |r|
429+
SearchFacetResult::NumericRangeFacetResult::NumericRangeFacet.new(r[:name], r[:count], r[:min], r[:max])
430+
end
431+
else
432+
[]
433+
end
434+
end
435+
else
436+
next # ignore unknown facet result
437+
end
438+
facet.name = v[:name]
439+
facet.field = v[:field]
440+
facet.total = v[:total]
441+
facet.missing = v[:missing]
442+
facet.other = v[:other]
443+
o[k] = facet
444+
end
445+
end
446+
end
447+
end
448+
430449
# @api private
431450
ClusterOptions = ::Couchbase::Options::Cluster
432451
# @api private

lib/couchbase/options.rb

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2213,7 +2213,7 @@ def to_backend(scope_name: nil, bucket_name: nil)
22132213
DEFAULT = Query.new.freeze
22142214
end
22152215

2216-
# Options for {Couchbase::Cluster#search_query}
2216+
# Options for {Couchbase::Cluster#search_query} and {Couchbase::Cluster#search}
22172217
class Search < Base
22182218
attr_accessor :limit # @return [Integer]
22192219
attr_accessor :skip # @return [Integer]
@@ -2321,11 +2321,11 @@ def scan_consistency=(level)
23212321
attr_reader :mutation_state
23222322

23232323
# @api private
2324-
# @return [Symbol
2324+
# @return [Symbol]
23252325
attr_reader :scan_consistency
23262326

23272327
# @api private
2328-
def to_backend(*)
2328+
def to_backend(show_request: nil)
23292329
{
23302330
timeout: Utils::Time.extract_duration(@timeout),
23312331
limit: @limit,
@@ -2341,13 +2341,36 @@ def to_backend(*)
23412341
facets: @facets&.map { |(k, v)| [k, JSON.generate(v)] },
23422342
scan_consistency: @scan_consistency,
23432343
mutation_state: @mutation_state&.to_a,
2344+
show_request: show_request,
23442345
}
23452346
end
23462347

23472348
# @api private
23482349
DEFAULT = Search.new.freeze
23492350
end
23502351

2352+
class VectorSearch
2353+
# @return [:and, :or, nil]
2354+
attr_accessor :vector_query_combination
2355+
2356+
# @param [:and, :or, nil] vector_query_combination
2357+
#
2358+
# @yieldparam [VectorSearchOptions] self
2359+
def initialize(vector_query_combination: nil)
2360+
@vector_query_combination = vector_query_combination
2361+
2362+
yield self if block_given?
2363+
end
2364+
2365+
def to_backend
2366+
{
2367+
vector_query_combination: @vector_query_combination,
2368+
}
2369+
end
2370+
2371+
DEFAULT = VectorSearch.new.freeze
2372+
end
2373+
23512374
# Options for {Couchbase::Cluster#view_query}
23522375
class View < Base
23532376
attr_accessor :scan_consistency # @return [Symbol]

0 commit comments

Comments
 (0)