Skip to content

Commit a6eaa9f

Browse files
Jonah CohenGunnar Kudrjavets
authored andcommitted
Encode column family id and flags in datadic
Summary: Serialize column family id, is_reverse_cf, and is_auto_cf in datadic. This way when we load RocksDB-SE initially, we can fully populate RDBSE_KEYDEF entries without having to wait for an open table call. Test Plan: Ran "drop index" diff after this one; verified that dropped indices persisted after shutdown. Reviewers: spetrunia, maykov, hermanlee4, yoshinorim Reviewed By: yoshinorim Subscribers: jtolmer Differential Revision: https://reviews.facebook.net/D32955
1 parent f8840f9 commit a6eaa9f

File tree

5 files changed

+91
-74
lines changed

5 files changed

+91
-74
lines changed

storage/rocksdb/ha_rocksdb.cc

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -713,7 +713,7 @@ class Primary_key_comparator : public rocksdb::Comparator
713713

714714
/* The following is not needed by RocksDB, but conceptually should be here: */
715715
static ulong get_hashnr(const char *key, size_t key_len);
716-
const char* Name() const { return "RocksDB_SE_v3.2"; }
716+
const char* Name() const { return "RocksDB_SE_v3.3"; }
717717

718718
//TODO: advanced funcs:
719719
// - FindShortestSeparator
@@ -730,7 +730,7 @@ class Reverse_comparator : public rocksdb::Comparator
730730
{
731731
return -bytewise_compare(a,b);
732732
}
733-
const char* Name() const { return "rev:RocksDB_SE_v3.2"; }
733+
const char* Name() const { return "rev:RocksDB_SE_v3.3"; }
734734
void FindShortestSeparator(std::string* start, const rocksdb::Slice& limit) const {}
735735
void FindShortSuccessor(std::string* key) const {}
736736
};
@@ -1330,12 +1330,12 @@ static int rocksdb_init_func(void *p)
13301330
sql_print_error("RocksDB: Error opening instance: %s", err_text.c_str());
13311331
DBUG_RETURN(1);
13321332
}
1333-
cf_manager.init(&cf_names, &cf_handles);
1333+
cf_manager.init(&cf_handles);
13341334

13351335
if (binlog_manager.init(rdb))
13361336
DBUG_RETURN(1);
13371337

1338-
if (ddl_manager.init(rdb))
1338+
if (ddl_manager.init(rdb, &cf_manager))
13391339
DBUG_RETURN(1);
13401340

13411341
stop_background_thread = false;
@@ -1855,17 +1855,7 @@ int ha_rocksdb::open(const char *name, int mode, uint test_if_locked)
18551855

18561856
uint key_len= table->key_info[table->s->primary_key].key_length;
18571857

1858-
const char *comment= table->key_info[table->s->primary_key].comment.str;
1859-
const char *idx_name= table->key_info[table->s->primary_key].name;
1860-
rocksdb::ColumnFamilyHandle *cf_handle;
1861-
bool is_auto_cf;
1862-
if (!(cf_handle= cf_manager.get_cf(comment, fullname.c_ptr(), idx_name,
1863-
&is_auto_cf)))
1864-
DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
1865-
18661858
pk_descr->setup(table); // move this into get_share() ??
1867-
pk_descr->set_cf_handle(cf_handle, is_cf_name_reverse(comment),
1868-
is_auto_cf);
18691859

18701860
uint packed_key_len= pk_descr->max_storage_fmt_length();
18711861

@@ -1880,15 +1870,7 @@ int ha_rocksdb::open(const char *name, int mode, uint test_if_locked)
18801870
if (i == table->s->primary_key) /* Primary key was processed above */
18811871
continue;
18821872

1883-
comment= table->key_info[i].comment.str;
1884-
idx_name= table->key_info[i].name;
1885-
if (!(cf_handle= cf_manager.get_cf(comment, fullname.c_ptr(), idx_name,
1886-
&is_auto_cf)))
1887-
DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
1888-
18891873
key_descr[i]->setup(table); // move this into get_share() ??
1890-
key_descr[i]->set_cf_handle(cf_handle, is_cf_name_reverse(comment),
1891-
is_auto_cf);
18921874

18931875
uint packed_len= key_descr[i]->max_storage_fmt_length();
18941876
if (packed_len > max_packed_sec_key_len)

storage/rocksdb/rdb_cf_manager.cc

Lines changed: 32 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -45,25 +45,23 @@ bool is_cf_name_reverse(const char *name)
4545
static PSI_mutex_key ex_key_cfm;
4646
#endif
4747

48-
void Column_family_manager::init(std::vector<std::string> *names,
49-
std::vector<rocksdb::ColumnFamilyHandle*> *handles)
48+
void Column_family_manager::init(std::vector<rocksdb::ColumnFamilyHandle*> *handles)
5049
{
5150
mysql_mutex_init(ex_key_cfm, &cfm_mutex, MY_MUTEX_INIT_FAST);
52-
DBUG_ASSERT(names->size() == handles->size());
53-
DBUG_ASSERT(names->size() > 0);
51+
DBUG_ASSERT(handles->size() > 0);
5452

5553
default_cf= (*handles)[0];
56-
for (size_t i = 0; i < names->size(); ++i)
57-
cf_map[(*names)[i]]= (*handles)[i];
54+
for (auto cfh : *handles) {
55+
cf_name_map[cfh->GetName()] = cfh;
56+
cf_id_map[cfh->GetID()] = cfh;
57+
}
5858
}
5959

6060

6161
void Column_family_manager::cleanup()
6262
{
63-
ColumnFamilyHandleMap::iterator it;
64-
for (it= cf_map.begin(); it!=cf_map.end(); it++)
65-
{
66-
delete it->second;
63+
for (auto it : cf_name_map) {
64+
delete it.second;
6765
}
6866
mysql_mutex_destroy(&cfm_mutex);
6967
}
@@ -98,7 +96,6 @@ Column_family_manager::get_or_create_cf(rocksdb::DB *rdb, const char *cf_name,
9896
bool *is_automatic)
9997
{
10098
rocksdb::ColumnFamilyHandle* cf_handle;
101-
ColumnFamilyHandleMap::iterator it;
10299

103100
mysql_mutex_lock(&cfm_mutex);
104101
*is_automatic= false;
@@ -116,7 +113,8 @@ Column_family_manager::get_or_create_cf(rocksdb::DB *rdb, const char *cf_name,
116113
*is_automatic= true;
117114
}
118115

119-
if ((it= cf_map.find(cf_name)) != cf_map.end())
116+
auto it = cf_name_map.find(cf_name);
117+
if (it != cf_name_map.end())
120118
cf_handle= it->second;
121119
else
122120
{
@@ -131,10 +129,12 @@ Column_family_manager::get_or_create_cf(rocksdb::DB *rdb, const char *cf_name,
131129
opts.target_file_size_base);
132130

133131
rocksdb::Status s= rdb->CreateColumnFamily(opts, cf_name_str, &cf_handle);
134-
if (s.ok())
135-
cf_map[cf_name_str]= cf_handle;
136-
else
132+
if (s.ok()) {
133+
cf_name_map[cf_handle->GetName()] = cf_handle;
134+
cf_id_map[cf_handle->GetID()] = cf_handle;
135+
} else {
137136
cf_handle= NULL;
137+
}
138138
}
139139
}
140140
mysql_mutex_unlock(&cfm_mutex);
@@ -162,7 +162,6 @@ Column_family_manager::get_cf(const char *cf_name,
162162
bool *is_automatic)
163163
{
164164
rocksdb::ColumnFamilyHandle* cf_handle;
165-
ColumnFamilyHandleMap::iterator it;
166165

167166
*is_automatic= false;
168167
mysql_mutex_lock(&cfm_mutex);
@@ -178,25 +177,35 @@ Column_family_manager::get_cf(const char *cf_name,
178177
*is_automatic= true;
179178
}
180179

181-
if ((it= cf_map.find(cf_name)) != cf_map.end())
182-
cf_handle= it->second;
183-
else
184-
cf_handle= NULL;
180+
auto it = cf_name_map.find(cf_name);
181+
cf_handle = (it != cf_name_map.end()) ? it->second : nullptr;
185182
}
186183
mysql_mutex_unlock(&cfm_mutex);
187184

188185
return cf_handle;
189186
}
190187

188+
rocksdb::ColumnFamilyHandle* Column_family_manager::get_cf(uint32_t id)
189+
{
190+
rocksdb::ColumnFamilyHandle* cf_handle = nullptr;
191+
192+
mysql_mutex_lock(&cfm_mutex);
193+
auto it = cf_id_map.find(id);
194+
if (it != cf_id_map.end())
195+
cf_handle = it->second;
196+
mysql_mutex_unlock(&cfm_mutex);
197+
198+
return cf_handle;
199+
}
200+
191201
std::vector<std::string>
192202
Column_family_manager::get_cf_names(void)
193203
{
194204
std::vector<std::string> names;
195-
ColumnFamilyHandleMap::iterator it;
196205

197206
mysql_mutex_lock(&cfm_mutex);
198-
for (it= cf_map.begin(); it != cf_map.end(); it++) {
199-
names.push_back(it->first);
207+
for (auto it : cf_name_map) {
208+
names.push_back(it.first);
200209
}
201210
mysql_mutex_unlock(&cfm_mutex);
202211
return names;

storage/rocksdb/rdb_cf_manager.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,8 @@ void get_per_index_cf_name(const char *db_table_name, const char *index_name,
4141

4242
class Column_family_manager
4343
{
44-
typedef std::map<std::string, rocksdb::ColumnFamilyHandle*> ColumnFamilyHandleMap;
45-
46-
ColumnFamilyHandleMap cf_map;
44+
std::map<std::string, rocksdb::ColumnFamilyHandle*> cf_name_map;
45+
std::map<uint32_t, rocksdb::ColumnFamilyHandle*> cf_id_map;
4746

4847
rocksdb::ColumnFamilyHandle *default_cf;
4948

@@ -53,8 +52,7 @@ class Column_family_manager
5352
This is called right after the DB::Open() call. The parameters describe column
5453
families that are present in the database. The first CF is the default CF.
5554
*/
56-
void init(std::vector<std::string> *names,
57-
std::vector<rocksdb::ColumnFamilyHandle*> *handles);
55+
void init(std::vector<rocksdb::ColumnFamilyHandle*> *handles);
5856
void cleanup();
5957

6058
/*
@@ -74,6 +72,9 @@ class Column_family_manager
7472
const char *index_name,
7573
bool *is_automatic);
7674

75+
/* Look up cf by id; used by datadic */
76+
rocksdb::ColumnFamilyHandle* get_cf(uint32_t);
77+
7778
/* Used to iterate over column families for show status */
7879
std::vector<std::string> get_cf_names(void);
7980

storage/rocksdb/rdb_datadic.cc

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "my_bit.h"
2525

2626
#include "rdb_datadic.h"
27+
#include "rdb_cf_manager.h"
2728

2829

2930
void key_restore(uchar *to_record, uchar *from_key, KEY *key_info,
@@ -36,6 +37,10 @@ void write_int(String *out, uint32 val)
3637
out->append((char*)&buf, 4);
3738
}
3839

40+
void write_byte(String *out, uchar val)
41+
{
42+
out->append((char*)&val, 1);
43+
}
3944

4045
uint32 read_int(char **data)
4146
{
@@ -45,6 +50,13 @@ uint32 read_int(char **data)
4550
return ntohl(buf);
4651
}
4752

53+
uchar read_byte(char **data)
54+
{
55+
uchar buf;
56+
memcpy(&buf, *data, sizeof(uchar));
57+
*data += sizeof(uchar);
58+
return buf;
59+
}
4860

4961
RDBSE_KEYDEF::~RDBSE_KEYDEF()
5062
{
@@ -886,16 +898,28 @@ void _rdbse_store_blob_length(uchar *pos,uint pack_length,uint length)
886898
Write table definition DDL entry.
887899
888900
We write
889-
dbname.tablename -> {index_nr, index_nr, index_nr, ... }
901+
dbname.tablename -> {key_entry, key_entry, key_entry, ... }
902+
903+
Where key entries are a tuple of
904+
( index_nr, column_family_id, flags )
890905
*/
891906

892907
void RDBSE_TABLE_DEF::write_to(rocksdb::DB *rdb_dict, uchar *key, size_t keylen)
893908
{
894-
StringBuffer<32> indexes;
909+
StringBuffer<8 * RDBSE_KEYDEF::PACKED_SIZE> indexes;
910+
indexes.alloc(n_keys * RDBSE_KEYDEF::PACKED_SIZE);
895911

896912
for (uint i=0; i < n_keys; i++)
897913
{
898-
write_int(&indexes, key_descr[i]->index_number);
914+
RDBSE_KEYDEF* kd = key_descr[i];
915+
916+
uchar flags =
917+
(kd->is_reverse_cf ? RDBSE_KEYDEF::REVERSE_CF_FLAG : 0) |
918+
(kd->is_auto_cf ? RDBSE_KEYDEF::AUTO_CF_FLAG : 0);
919+
920+
write_int(&indexes, kd->index_number);
921+
write_int(&indexes, kd->get_cf()->GetID());
922+
write_byte(&indexes, flags);
899923
}
900924
rocksdb::Slice skey((char*)key, keylen);
901925
rocksdb::Slice svalue(indexes.c_ptr(), indexes.length());
@@ -921,7 +945,7 @@ void Table_ddl_manager::free_hash_elem(void* data)
921945
}
922946

923947

924-
bool Table_ddl_manager::init(rocksdb::DB *rdb_dict)
948+
bool Table_ddl_manager::init(rocksdb::DB *rdb_dict, Column_family_manager *cf_manager)
925949
{
926950
mysql_rwlock_init(0, &rwlock);
927951
(void) my_hash_init(&ddl_hash, /*system_charset_info*/&my_charset_bin, 32,0,0,
@@ -964,19 +988,13 @@ bool Table_ddl_manager::init(rocksdb::DB *rdb_dict)
964988

965989
// Now, read the DDLs.
966990

967-
if (val.size() < RDBSE_KEYDEF::INDEX_NUMBER_SIZE)
968-
{
969-
sql_print_error("RocksDB: Table_store: no keys defined in %*s",
970-
(int)key.size(), key.data());
971-
return true;
972-
}
973-
if (val.size() % RDBSE_KEYDEF::INDEX_NUMBER_SIZE)
991+
if (val.size() % RDBSE_KEYDEF::PACKED_SIZE)
974992
{
975993
sql_print_error("RocksDB: Table_store: invalid keylist for table %s",
976994
tdef->dbname_tablename.c_ptr_safe());
977995
return true;
978996
}
979-
tdef->n_keys= val.size() / RDBSE_KEYDEF::INDEX_NUMBER_SIZE;
997+
tdef->n_keys= val.size() / RDBSE_KEYDEF::PACKED_SIZE;
980998
if (!(tdef->key_descr= new RDBSE_KEYDEF*[tdef->n_keys]))
981999
return true;
9821000

@@ -987,14 +1005,20 @@ bool Table_ddl_manager::init(rocksdb::DB *rdb_dict)
9871005
for (uint keyno=0; ptr < ptr_end; keyno++)
9881006
{
9891007
int index_number= read_int(&ptr);
1008+
int cf_id= read_int(&ptr);
1009+
uchar flags = read_byte(&ptr);
1010+
1011+
rocksdb::ColumnFamilyHandle* cfh = cf_manager->get_cf(cf_id);
1012+
DBUG_ASSERT(cfh != nullptr);
9901013

9911014
/*
9921015
We can't fully initialize RDBSE_KEYDEF object here, because full
9931016
initialization requires that there is an open TABLE* where we could
9941017
look at Field* objects and set max_length and other attributes
9951018
*/
996-
tdef->key_descr[keyno]= new RDBSE_KEYDEF(index_number, keyno, NULL,
997-
false, false);
1019+
tdef->key_descr[keyno]= new RDBSE_KEYDEF(index_number, keyno, cfh,
1020+
flags & RDBSE_KEYDEF::REVERSE_CF_FLAG,
1021+
flags & RDBSE_KEYDEF::AUTO_CF_FLAG);
9981022

9991023
/* Keep track of what was the last index number we saw */
10001024
if (max_number < index_number)

storage/rocksdb/rdb_datadic.h

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
class RDBSE_KEYDEF;
1818
class Field_pack_info;
19+
class Column_family_manager;
1920

2021
inline void store_index_number(uchar *dst, uint32 number)
2122
{
@@ -225,11 +226,19 @@ class RDBSE_KEYDEF
225226
maxlength(0) // means 'not intialized'
226227
{
227228
store_index_number(index_number_storage_form, indexnr_arg);
229+
DBUG_ASSERT(cf_handle_arg != nullptr);
228230
}
229231
~RDBSE_KEYDEF();
230232

231233
enum {
232-
INDEX_NUMBER_SIZE= 4
234+
INDEX_NUMBER_SIZE= 4,
235+
PACKED_SIZE = 9, // two ints + 1 uchar
236+
};
237+
238+
// bit flags for combining bools when writing to disk
239+
enum {
240+
REVERSE_CF_FLAG = 1,
241+
AUTO_CF_FLAG = 2,
233242
};
234243

235244
enum {
@@ -239,14 +248,6 @@ class RDBSE_KEYDEF
239248
};
240249

241250
void setup(TABLE *table);
242-
void set_cf_handle(rocksdb::ColumnFamilyHandle* cf_handle_arg,
243-
bool is_reverse_cf_arg,
244-
bool is_auto_cf_arg)
245-
{
246-
cf_handle= cf_handle_arg;
247-
is_reverse_cf= is_reverse_cf_arg;
248-
is_auto_cf= is_auto_cf_arg;
249-
}
250251

251252
rocksdb::ColumnFamilyHandle *get_cf() { return cf_handle; }
252253

@@ -457,7 +458,7 @@ class Table_ddl_manager
457458

458459
public:
459460
/* Load the data dictionary from on-disk storage */
460-
bool init(rocksdb::DB *rdb_dict);
461+
bool init(rocksdb::DB *rdb_dict, Column_family_manager *cf_manager);
461462

462463
void cleanup();
463464

0 commit comments

Comments
 (0)