Skip to content

Commit c019294

Browse files
author
Libing Song
committed
WL#4618 RBR: extended table metadata in the binary log
This patch extends Table Map Event. It appends some new fields for more metadata. The new metadata includes: - Signedness of Numberic Columns - Character Set of Character Columns and Binary Columns - Column Name - String Value of SET Columns - String Value of ENUM Columns - Primary Key - Geometry Type Some of them are optional, the patch introduces a GLOBAL system variable to control it. It is binlog_row_metadata. - Scope: GLOBAL - Dynamic: Yes - Type: ENUM - Values: {MINIMAL, FULL} - Default: MINIMAL Only Signedness, character set and geometry type are logged if it is MINIMAL. Otherwise all of them are logged.
1 parent 8453159 commit c019294

17 files changed

+2122
-12
lines changed

client/client_priv.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ enum options_client
106106
OPT_CONNECTION_SERVER_ID,
107107
OPT_TLS_VERSION,
108108
OPT_SSL_MODE,
109+
OPT_PRINT_TABLE_METADATA,
109110
/* Add new option above this */
110111
OPT_MAX_CLIENT_OPTION
111112
};

client/mysqlbinlog.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,8 @@ Checkable_rwlock *global_sid_lock= NULL;
370370
Gtid_set *gtid_set_included= NULL;
371371
Gtid_set *gtid_set_excluded= NULL;
372372

373+
static bool opt_print_table_metadata;
374+
373375
/**
374376
Pointer to the Format_description_log_event of the currently active binlog.
375377
@@ -1706,6 +1708,10 @@ static struct my_option my_long_options[] =
17061708
"Identifiers were provided.",
17071709
&opt_exclude_gtids_str, &opt_exclude_gtids_str, 0,
17081710
GET_STR_ALLOC, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
1711+
{"print-table-metadata", OPT_PRINT_TABLE_METADATA,
1712+
"Print metadata stored in Table_map_log_event",
1713+
&opt_print_table_metadata, &opt_print_table_metadata, 0,
1714+
GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
17091715
{0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
17101716
};
17111717

@@ -2092,6 +2098,7 @@ static Exit_status dump_multiple_logs(int argc, char **argv)
20922098
print_event_info.short_form= short_form;
20932099
print_event_info.base64_output_mode= opt_base64_output_mode;
20942100
print_event_info.skip_gtids= opt_skip_gtids;
2101+
print_event_info.print_table_metadata= opt_print_table_metadata;
20952102

20962103
// Dump all logs.
20972104
my_off_t save_stop_position= stop_position;

libbinlogevents/include/rows_event.h

Lines changed: 174 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
1+
/* Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
22
33
This program is free software; you can redistribute it and/or modify
44
it under the terms of the GNU General Public License as published by
@@ -158,6 +158,18 @@ namespace binary_log
158158
on. </td>
159159
</tr>
160160
161+
<tr>
162+
<td>optional metadata fields</td>
163+
<td>optional metadata fields are stored in Type, Length, Value(TLV) format.
164+
Type takes 1 byte. Length is a packed integer value. Values takes
165+
Length bytes.
166+
</td>
167+
<td>There are some optional metadata defined. They are listed in the table
168+
@ref Table_table_map_event_optional_metadata. Optional metadata fields
169+
follow null_bits. Whether binlogging an optional metadata is decided by the
170+
server. The order is not defined, so they can be binlogged in any order.
171+
</td>
172+
</tr>
161173
</table>
162174
163175
The table below lists all column types, along with the numerical
@@ -363,6 +375,88 @@ namespace binary_log
363375
</tr>
364376
365377
</table>
378+
379+
The table below lists all optional metadata types, along with the numerical
380+
identifier for it and the size and interpretation of meta-data used
381+
to describe the type.
382+
383+
@anchor Table_table_map_event_optional_metadata
384+
<table>
385+
<caption>Table_map_event optional metadata types: numerical identifier and
386+
metadata. Optional metadata fields are stored in TLV fields.
387+
Format of values are described in this table. </caption>
388+
<tr>
389+
<th>Type</th>
390+
<th>Description</th>
391+
<th>Format</th>
392+
</tr>
393+
<tr>
394+
<td>SIGNEDNESS</td>
395+
<td>signedness of numeric colums</td>
396+
<td>For each numeric column, a bit indicates whether the numeric colunm has
397+
unsigned flag. 1 means it is unsigned. The number of bytes needed for this is
398+
int((column_count + 7) / 8). The order is same to the order of column_type
399+
field.</td>
400+
</tr>
401+
<tr>
402+
<td>DEFAULT_CHARSET</td>
403+
<td>Charsets of character columns. It has a default charset for the case
404+
that most of character columns have same charset and the most used charset
405+
is binlogged as default charset.Collation numbers are binlogged for
406+
identifying charsets. They are stored in packed length format. </td>
407+
<td>Default charset's collation is logged first. The charsets which are not
408+
same to default charset are logged following default charset. They are
409+
logged as column index and charset collation number pair sequence. The
410+
column index is counted only in all character columns. The order is same to
411+
the order of column_type
412+
field. </td>
413+
</tr>
414+
<tr>
415+
<td>COLUMN_CHARSET</td>
416+
<td>Charsets of character columns. For the case that most of columns have
417+
different charsets, this field is logged. It is never logged with
418+
DEFAULT_CHARSET together.</td>
419+
<td>It is a collation number sequence for all character columns.</td>
420+
</tr>
421+
<tr>
422+
<td>COLUMN_NAME</td>
423+
<td>Names of columns</td>
424+
<td>A sequence of column names. For each column name, 1 byte string length
425+
followed by a string without null terminator. </td>
426+
</tr>
427+
<tr>
428+
<td>SET_STR_VALUE</td>
429+
<td>The string values of SET columns</td>
430+
<td>For each SET column, a pack_length presents value count is followed by
431+
a sequence of length and string pairs. length is pack_length and string
432+
has no null terminator.</td>
433+
</tr>
434+
<tr>
435+
<td>ENUM_STR_VALUE</td>
436+
<td>The string values is ENUM columns</td>
437+
<td>Format is same to SET_STR_VALUE</td>
438+
</tr>
439+
<tr>
440+
<td>GEOMETRY_TYPE</td>
441+
<td>The real type of geometry columns</td>
442+
<td>A sequence of real type of geometry columns are stored in pack_length
443+
format. </td>
444+
</tr>
445+
<tr>
446+
<td>SIMPLE_PRIMARY_KEY</td>
447+
<td>The primary key without any prefix</td>
448+
<td>A sequence of column indexes. The indexes are stored in pack_length
449+
format.</td>
450+
</tr>
451+
<tr>
452+
<td>PRIMARY_KEY_WITH_PREFIX</td>
453+
<td>The primary key with some prefix. It doesn't appear with
454+
SIMPLE_PRIMARY_KEY together. </td>
455+
<td>A sequence of column index and prefix length pairs. Both
456+
column index and prefix length are in pack_length format. It means
457+
the whole value is used even if prefix length is 0.</td>
458+
</tr>
459+
</table>
366460
*/
367461
class Table_map_event: public Binary_log_event
368462
{
@@ -376,6 +470,77 @@ class Table_map_event: public Binary_log_event
376470

377471
typedef uint16_t flag_set;
378472

473+
/**
474+
DEFAULT_CHARSET and COLUMN_CHARSET don't appear together. They are just two
475+
ways to pack character set information. When binlogging, it just log
476+
character set in the way which occupy less storage.
477+
478+
SIMPLE_PRIMARY_KEY and PRIMARY_KEY_WITH_PREFIX don't appear together.
479+
SIMPLE_PRIMARY_KEY is for the primary keys which only use whole values of
480+
pk columns. PRIMARY_KEY_WITH_PREFIX is
481+
for the primary keys which just use part value of pk columns.
482+
*/
483+
enum Optional_metadata_field_type
484+
{
485+
SIGNEDNESS= 1, // UNSIGNED flag of numeric columns
486+
DEFAULT_CHARSET, // Default character set of string columns
487+
COLUMN_CHARSET, // Character set of string columns
488+
COLUMN_NAME,
489+
SET_STR_VALUE, // String value of SET columns
490+
ENUM_STR_VALUE, // String value of ENUM columns
491+
GEOMETRY_TYPE, // Real type of geometry columns
492+
SIMPLE_PRIMARY_KEY, // Primary key without prefix
493+
PRIMARY_KEY_WITH_PREFIX // Primary key with prefix
494+
};
495+
496+
/**
497+
Metadata_fields organizes m_optional_metadata into a structured format which
498+
is easy to access.
499+
*/
500+
struct Optional_metadata_fields
501+
{
502+
typedef std::pair<unsigned int, unsigned int> uint_pair;
503+
typedef std::vector<std::string> str_vector;
504+
505+
struct Default_charset
506+
{
507+
Default_charset() : default_charset(0) {}
508+
bool empty() const { return default_charset == 0; }
509+
510+
// Default charset for the columns which are not in charset_pairs.
511+
unsigned int default_charset;
512+
513+
/* The uint_pair means <column index, column charset number>. */
514+
std::vector<uint_pair> charset_pairs;
515+
};
516+
517+
// Content of DEFAULT_CHARSET field is converted into Default_charset.
518+
Default_charset m_default_charset;
519+
std::vector<bool> m_signedness;
520+
// Character set number of every column
521+
std::vector<unsigned int> m_column_charset;
522+
std::vector<std::string> m_column_name;
523+
// each str_vector stores values of one enum/set column
524+
std::vector<str_vector> m_enum_str_value;
525+
std::vector<str_vector> m_set_str_value;
526+
std::vector<unsigned int> m_geometry_type;
527+
/*
528+
The uint_pair means <column index, prefix length>. Prefix length is 0 if
529+
whole column value is used.
530+
*/
531+
std::vector<uint_pair> m_primary_key;
532+
533+
/*
534+
It parses m_optional_metadata and populates into above variables.
535+
536+
@param[in] optional_metadata points to the begin of optional metadata
537+
fields in table_map_event.
538+
@param[in] optional_metadata_len length of optional_metadata field.
539+
*/
540+
Optional_metadata_fields(unsigned char* optional_metadata,
541+
unsigned int optional_metadata_len);
542+
};
543+
379544
/**
380545
<pre>
381546
The buffer layout for fixed data part is as follows:
@@ -418,7 +583,9 @@ class Table_map_event: public Binary_log_event
418583
m_colcnt(colcnt),
419584
m_field_metadata_size(0),
420585
m_field_metadata(0),
421-
m_null_bits(0)
586+
m_null_bits(0),
587+
m_optional_metadata_len(0),
588+
m_optional_metadata(NULL)
422589
{
423590
if (dbnam)
424591
m_dbnam= std::string(dbnam, m_dblen);
@@ -449,13 +616,17 @@ class Table_map_event: public Binary_log_event
449616
unsigned long m_field_metadata_size;
450617
unsigned char* m_field_metadata; /** field metadata */
451618
unsigned char* m_null_bits;
619+
unsigned int m_optional_metadata_len;
620+
unsigned char* m_optional_metadata;
452621

453622
Table_map_event()
454623
: Binary_log_event(TABLE_MAP_EVENT),
455624
m_coltype(0),
456625
m_field_metadata_size(0),
457626
m_field_metadata(0),
458-
m_null_bits(0)
627+
m_null_bits(0),
628+
m_optional_metadata_len(0),
629+
m_optional_metadata(NULL)
459630
{}
460631

461632
unsigned long long get_table_id()

0 commit comments

Comments
 (0)