Skip to content

Commit 231aafd

Browse files
yoshinorimGunnar Kudrjavets
authored andcommitted
Skip using bloom filter on prefix bloom filter + prefix lookup + Prev()
Summary: Currently RocksDB does not support bloom filter for prefix lookup + descending scan, and results may be unexpected. This diff disables using bloom filter on prefix lookup + descending scan. This diff also turns off bloom filter on index_first and index_last, even if order is ascending. In the future RocksDB will support bloom filter for prefix + descending scan. Once it is supported, this diff will need to be modified or reverted. Test Plan: mtr, and extending bloomfilter test case Reviewers: maykov, jonahcohen, spetrunia, rven, sdong, hermanlee4 Reviewed By: hermanlee4 Subscribers: sdong, rven Differential Revision: https://reviews.facebook.net/D33597
1 parent a6eaa9f commit 231aafd

File tree

2 files changed

+72
-13
lines changed

2 files changed

+72
-13
lines changed

storage/rocksdb/ha_rocksdb.cc

Lines changed: 69 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ void key_copy(uchar *to_key, uchar *from_record, KEY *key_info,
5555
void* background_thread(void*);
5656
bool can_use_bloom_filter(RDBSE_KEYDEF *keydef,
5757
const rocksdb::Slice &eq_cond,
58-
const bool use_all_keys);
58+
const bool use_all_keys,
59+
bool is_ascending);
5960
static int rocksdb_normalize_tablename(const char *tablename,
6061
StringBuffer<256> *strbuf);
6162

@@ -2314,7 +2315,7 @@ int ha_rocksdb::index_read_map(uchar *buf, const uchar *key,
23142315
This will open the iterator and position it at a record that's equal or
23152316
greater than the lookup tuple.
23162317
*/
2317-
setup_index_scan(kd, &slice, use_all_keys);
2318+
setup_index_scan(kd, &slice, use_all_keys, is_ascending(kd, find_flag));
23182319
bool move_forward= true;
23192320

23202321

@@ -3115,7 +3116,8 @@ int ha_rocksdb::write_row(uchar *buf)
31153116

31163117
void ha_rocksdb::setup_index_scan(RDBSE_KEYDEF *keydef,
31173118
rocksdb::Slice *slice,
3118-
bool use_all_keys)
3119+
bool use_all_keys,
3120+
bool is_ascending)
31193121
{
31203122
Rdb_transaction *trx= get_or_create_trx(table->in_use);
31213123

@@ -3128,7 +3130,7 @@ void ha_rocksdb::setup_index_scan(RDBSE_KEYDEF *keydef,
31283130
rocksdb::ReadOptions options;
31293131
if (!lock_rows)
31303132
options.snapshot= trx->snapshot;
3131-
if (!can_use_bloom_filter(keydef, *slice, use_all_keys))
3133+
if (!can_use_bloom_filter(keydef, *slice, use_all_keys, is_ascending))
31323134
options.total_order_seek= true;
31333135
rocksdb::Iterator* rocksdb_it= rdb->NewIterator(options, keydef->get_cf());
31343136
scan_it= new Apply_changes_iter;
@@ -3910,6 +3912,54 @@ void ha_rocksdb::get_auto_increment(ulonglong offset, ulonglong increment,
39103912
mysql_mutex_unlock(&tbl_def->mutex);
39113913
}
39123914

3915+
/**
3916+
Checking if an index is used for ascending scan or not
3917+
3918+
@detail
3919+
Currently RocksDB does not support bloom filter for
3920+
prefix lookup + descending scan, but supports bloom filter for
3921+
prefix lookup + ascending scan. This function returns true if
3922+
the scan pattern is absolutely ascending.
3923+
@param keydef
3924+
@param find_flag
3925+
*/
3926+
bool ha_rocksdb::is_ascending(RDBSE_KEYDEF *keydef, enum ha_rkey_function find_flag)
3927+
{
3928+
bool is_ascending= false;
3929+
switch (find_flag) {
3930+
case HA_READ_KEY_EXACT:
3931+
case HA_READ_PREFIX:
3932+
{
3933+
is_ascending= true;
3934+
break;
3935+
}
3936+
case HA_READ_KEY_OR_NEXT:
3937+
case HA_READ_AFTER_KEY:
3938+
{
3939+
if (keydef->is_reverse_cf)
3940+
is_ascending= false;
3941+
else
3942+
is_ascending= true;
3943+
break;
3944+
}
3945+
case HA_READ_KEY_OR_PREV:
3946+
case HA_READ_BEFORE_KEY:
3947+
case HA_READ_PREFIX_LAST:
3948+
case HA_READ_PREFIX_LAST_OR_PREV:
3949+
{
3950+
if (keydef->is_reverse_cf)
3951+
is_ascending= true;
3952+
else
3953+
is_ascending= false;
3954+
break;
3955+
}
3956+
default:
3957+
is_ascending= false;
3958+
}
3959+
return is_ascending;
3960+
}
3961+
3962+
39133963
#define SHOW_FNAME(name) rocksdb_show_##name
39143964

39153965
#define DEF_SHOW_FUNC(name, key) \
@@ -4186,28 +4236,36 @@ void* background_thread(void*)
41864236
*/
41874237
bool can_use_bloom_filter(RDBSE_KEYDEF *keydef,
41884238
const rocksdb::Slice &eq_cond,
4189-
const bool use_all_keys)
4239+
const bool use_all_keys,
4240+
bool is_ascending)
41904241
{
41914242
bool can_use= false;
41924243
rocksdb::Options opt = rdb->GetOptions(keydef->get_cf());
41934244
if (opt.prefix_extractor)
41944245
{
41954246
/*
41964247
This is an optimized use case for CappedPrefixTransform.
4197-
If eq_cond length >= prefix extractor length, it is
4198-
always possible to use bloom filter. Keys longer than the
4199-
capped prefix length will be truncated down to the capped length
4200-
and the resulting key is added to the bloom filter.
4248+
If eq_cond length >= prefix extractor length and if
4249+
all keys are used for equal lookup, it is
4250+
always possible to use bloom filter.
4251+
4252+
Prefix bloom filter can't be used on descending scan with
4253+
prefix lookup (i.e. WHERE id1=1 ORDER BY id2 DESC), because of
4254+
RocksDB's limitation. On ascending (or not sorting) scan,
4255+
keys longer than the capped prefix length will be truncated down
4256+
to the capped length and the resulting key is added to the bloom filter.
42014257
42024258
Keys shorter than the capped prefix length will be added to
42034259
the bloom filter. When keys are looked up, key conditionals
42044260
longer than the capped length can be used; key conditionals
42054261
shorter require all parts of the key to be available
42064262
for the short key match.
42074263
*/
4208-
if (opt.prefix_extractor->SameResultWhenAppended(eq_cond))
4264+
if (use_all_keys && opt.prefix_extractor->InRange(eq_cond))
42094265
can_use= true;
4210-
else if (use_all_keys && opt.prefix_extractor->InRange(eq_cond))
4266+
else if (!is_ascending)
4267+
can_use= false;
4268+
else if (opt.prefix_extractor->SameResultWhenAppended(eq_cond))
42114269
can_use= true;
42124270
else
42134271
can_use= false;

storage/rocksdb/ha_rocksdb.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,10 +171,11 @@ class ha_rocksdb: public handler
171171
int secondary_index_read(int keyno, uchar *buf);
172172
void setup_index_scan(RDBSE_KEYDEF *keydef, rocksdb::Slice *slice)
173173
{
174-
setup_index_scan(keydef, slice, false);
174+
setup_index_scan(keydef, slice, false, false);
175175
}
176+
bool is_ascending(RDBSE_KEYDEF *keydef, enum ha_rkey_function find_flag);
176177
void setup_index_scan(RDBSE_KEYDEF *keydef, rocksdb::Slice *slice,
177-
bool use_all_keys);
178+
bool use_all_keys, bool is_ascending);
178179
int get_row_by_rowid(uchar *buf, const char *pk_tuple, uint pk_tuple_size);
179180

180181
void update_auto_incr_val();

0 commit comments

Comments
 (0)