Skip to content

Commit a5a2224

Browse files
committed
Fix Bug#20783098 INNODB_CHECKSUM_ALGORITHM=CRC32 IS NOT BYTE ORDER AGNOSTIC
The CRC32 checksum generation code interprets portions of the byte string to checksum as a 8-byte integer so that it can process 8 bytes at a time (rather than 1 byte at a time). For this, the code uses the native byte order of the machine: crc ^= *(ib_uint64_t*) buf; and then does numerical calculations with the result (e.g. crc >> N). Thus the resulting checksum depends on the byte order of the machine and is different on big and little endian machines. This means that files written to with --innodb-checksum-algorithm=crc32/strict_crc32 on big (little) endian machines are not readable on little (big) endian machines because the checksum, though valid, is not recognized. The simplest solution would be to start writing only e.g. big endian checksums and recognize only such ones, but this would introduce an unacceptable backwards incompatibility. The solution implemented is to recognize both big and little endian CRC32 checksums during verification, while first calculating and checking the little endian one. Swapping the byteorder in order to calculate "the other" CRC32 checksum slows down the checksum calculation by about 1-2% (e.g. recognize big-endian-CRC32 on little endian machines or recognize little-endian-CRC32 on big endian machines). When generating the checksum (when writing to disk) we now always use little endian byteorder (no change in little endian machines, and an extra step of swapping the byteorder on big-endian machines). Reviewed-by: Debarun Banerjee <[email protected]> RB: 8781
1 parent 5d15c3d commit a5a2224

File tree

19 files changed

+3006
-242
lines changed

19 files changed

+3006
-242
lines changed

extra/innochecksum.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -457,8 +457,9 @@ update_checksum(
457457

458458
if (iscompressed) {
459459
/* page is compressed */
460-
checksum = page_zip_calc_checksum(page, physical_page_size,
461-
static_cast<srv_checksum_algorithm_t>(write_check));
460+
checksum = page_zip_calc_checksum(
461+
page, physical_page_size,
462+
static_cast<srv_checksum_algorithm_t>(write_check));
462463

463464
mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
464465
if (is_log_enabled) {
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
CREATE TABLE crc32_big_endian (a VARCHAR(16)) ENGINE=INNODB;
2+
CREATE TABLE crc32_little_endian (a VARCHAR(16)) ENGINE=INNODB;
3+
ALTER TABLE crc32_big_endian DISCARD TABLESPACE;
4+
ALTER TABLE crc32_little_endian DISCARD TABLESPACE;
5+
ALTER TABLE crc32_big_endian IMPORT TABLESPACE;
6+
ALTER TABLE crc32_little_endian IMPORT TABLESPACE;
7+
SELECT * FROM crc32_big_endian;
8+
a
9+
big 1
10+
big 2
11+
big 3
12+
SELECT * FROM crc32_little_endian;
13+
a
14+
little 1
15+
little 2
16+
little 3
17+
DROP TABLE crc32_big_endian;
18+
DROP TABLE crc32_little_endian;
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#
2+
# Make sure that files created with --innodb-checksum-algorithm=crc32
3+
# on both big and little endian machines are readable. The two .ibd files
4+
# are precreated in crc32_endianness.zip which is extracted below.
5+
#
6+
7+
-- source include/have_innodb.inc
8+
# Since we import a pre-created tablespace with 16k page size, we would get
9+
# this on InnoDB configured with non-16k page size:
10+
# ALTER TABLE crc32_big_endian IMPORT TABLESPACE:
11+
# Schema mismatch (Tablespace to be imported has a different page size than
12+
# this server. Server page size is 4096, whereas tablespace page size is 16384)
13+
-- source include/have_innodb_16k.inc
14+
15+
CREATE TABLE crc32_big_endian (a VARCHAR(16)) ENGINE=INNODB;
16+
CREATE TABLE crc32_little_endian (a VARCHAR(16)) ENGINE=INNODB;
17+
18+
ALTER TABLE crc32_big_endian DISCARD TABLESPACE;
19+
ALTER TABLE crc32_little_endian DISCARD TABLESPACE;
20+
21+
-- exec unzip -qo $MYSQL_TEST_DIR/suite/innodb/t/crc32_endianness.zip -d $MYSQLTEST_VARDIR/tmp/crc32_endianness
22+
23+
-- let DATADIR = `SELECT @@datadir`
24+
25+
-- copy_file $MYSQLTEST_VARDIR/tmp/crc32_endianness/crc32_big_endian.ibd $DATADIR/test/crc32_big_endian.ibd
26+
-- copy_file $MYSQLTEST_VARDIR/tmp/crc32_endianness/crc32_big_endian.cfg $DATADIR/test/crc32_big_endian.cfg
27+
-- copy_file $MYSQLTEST_VARDIR/tmp/crc32_endianness/crc32_little_endian.ibd $DATADIR/test/crc32_little_endian.ibd
28+
-- copy_file $MYSQLTEST_VARDIR/tmp/crc32_endianness/crc32_little_endian.cfg $DATADIR/test/crc32_little_endian.cfg
29+
30+
ALTER TABLE crc32_big_endian IMPORT TABLESPACE;
31+
ALTER TABLE crc32_little_endian IMPORT TABLESPACE;
32+
33+
SELECT * FROM crc32_big_endian;
34+
SELECT * FROM crc32_little_endian;
35+
36+
DROP TABLE crc32_big_endian;
37+
DROP TABLE crc32_little_endian;
1.86 KB
Binary file not shown.

storage/innobase/buf/buf0buf.cc

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -561,20 +561,34 @@ buf_page_is_checksum_valid_crc32(
561561
#endif /* UNIV_INNOCHECKSUM */
562562
)
563563
{
564-
uint32_t crc32 = buf_calc_page_crc32(read_buf);
564+
const uint32_t crc32 = buf_calc_page_crc32(read_buf);
565565

566566
#ifdef UNIV_INNOCHECKSUM
567567
if (is_log_enabled
568568
&& curr_algo == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) {
569569
fprintf(log_file, "page::%" PRIuMAX ";"
570570
" crc32 calculated = %u;"
571571
" recorded checksum field1 = %lu recorded"
572-
" checksum field2 =%lu\n", page_no, crc32,
573-
checksum_field1, checksum_field2);
572+
" checksum field2 =%lu\n", page_no,
573+
crc32, checksum_field1, checksum_field2);
574574
}
575575
#endif /* UNIV_INNOCHECKSUM */
576576

577-
return(checksum_field1 == crc32 && checksum_field2 == crc32);
577+
if (checksum_field1 != checksum_field2) {
578+
return(false);
579+
}
580+
581+
if (checksum_field1 == crc32) {
582+
return(true);
583+
}
584+
585+
const uint32_t crc32_legacy = buf_calc_page_crc32(read_buf, true);
586+
587+
if (checksum_field1 == crc32_legacy) {
588+
return(true);
589+
}
590+
591+
return(false);
578592
}
579593

580594
/** Checks if the page is in innodb checksum format.
@@ -1094,6 +1108,10 @@ buf_page_print(
10941108
<< page_zip_calc_checksum(
10951109
read_buf, page_size.physical(),
10961110
SRV_CHECKSUM_ALGORITHM_CRC32)
1111+
<< "/"
1112+
<< page_zip_calc_checksum(
1113+
read_buf, page_size.physical(),
1114+
SRV_CHECKSUM_ALGORITHM_CRC32, true)
10971115
<< ", "
10981116
<< buf_checksum_algorithm_name(
10991117
SRV_CHECKSUM_ALGORITHM_INNODB)
@@ -1118,13 +1136,19 @@ buf_page_print(
11181136
read_buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
11191137

11201138
} else {
1139+
const uint32_t crc32 = buf_calc_page_crc32(read_buf);
1140+
1141+
const uint32_t crc32_legacy = buf_calc_page_crc32(read_buf,
1142+
true);
1143+
11211144
ib::info() << "Uncompressed page, stored checksum in field1 "
11221145
<< mach_read_from_4(
11231146
read_buf + FIL_PAGE_SPACE_OR_CHKSUM)
11241147
<< ", calculated checksums for field1: "
11251148
<< buf_checksum_algorithm_name(
11261149
SRV_CHECKSUM_ALGORITHM_CRC32) << " "
1127-
<< buf_calc_page_crc32(read_buf) << ", "
1150+
<< crc32 << "/" << crc32_legacy
1151+
<< ", "
11281152
<< buf_checksum_algorithm_name(
11291153
SRV_CHECKSUM_ALGORITHM_INNODB) << " "
11301154
<< buf_calc_page_new_checksum(read_buf)
@@ -1138,7 +1162,7 @@ buf_page_print(
11381162
<< ", calculated checksums for field2: "
11391163
<< buf_checksum_algorithm_name(
11401164
SRV_CHECKSUM_ALGORITHM_CRC32) << " "
1141-
<< buf_calc_page_crc32(read_buf)
1165+
<< crc32 << "/" << crc32_legacy
11421166
<< ", "
11431167
<< buf_checksum_algorithm_name(
11441168
SRV_CHECKSUM_ALGORITHM_INNODB) << " "
@@ -3652,6 +3676,10 @@ buf_zip_decompress(
36523676
<< ", crc32: "
36533677
<< page_zip_calc_checksum(
36543678
frame, size, SRV_CHECKSUM_ALGORITHM_CRC32)
3679+
<< "/"
3680+
<< page_zip_calc_checksum(
3681+
frame, size, SRV_CHECKSUM_ALGORITHM_CRC32,
3682+
true)
36553683
<< " innodb: "
36563684
<< page_zip_calc_checksum(
36573685
frame, size, SRV_CHECKSUM_ALGORITHM_INNODB)

storage/innobase/buf/buf0checksum.cc

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -40,18 +40,20 @@ ha_innodb.cc:12251: error: cannot convert 'srv_checksum_algorithm_t*' to
4040
'long unsigned int*' in initialization */
4141
ulong srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_INNODB;
4242

43-
/********************************************************************//**
44-
Calculates a page CRC32 which is stored to the page when it is written
45-
to a file. Note that we must be careful to calculate the same value on
46-
32-bit and 64-bit architectures.
43+
/** Calculates the CRC32 checksum of a page. The value is stored to the page
44+
when it is written to a file and also checked for a match when reading from
45+
the file. When reading we allow both normal CRC32 and CRC-legacy-big-endian
46+
variants. Note that we must be careful to calculate the same value on 32-bit
47+
and 64-bit architectures.
48+
@param[in] page buffer page (UNIV_PAGE_SIZE bytes)
49+
@param[in] use_legacy_big_endian if true then use big endian
50+
byteorder when converting byte strings to integers
4751
@return checksum */
48-
ib_uint32_t
52+
uint32_t
4953
buf_calc_page_crc32(
50-
/*================*/
51-
const byte* page) /*!< in: buffer page */
54+
const byte* page,
55+
bool use_legacy_big_endian /* = false */)
5256
{
53-
ib_uint32_t checksum;
54-
5557
/* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
5658
FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, are written outside the buffer pool
5759
to the first pages of data files, we have to skip them in the page
@@ -60,13 +62,19 @@ buf_calc_page_crc32(
6062
checksum is stored, and also the last 8 bytes of page because
6163
there we store the old formula checksum. */
6264

63-
checksum = ut_crc32(page + FIL_PAGE_OFFSET,
64-
FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
65-
^ ut_crc32(page + FIL_PAGE_DATA,
66-
UNIV_PAGE_SIZE - FIL_PAGE_DATA
67-
- FIL_PAGE_END_LSN_OLD_CHKSUM);
65+
ut_crc32_func_t crc32_func = use_legacy_big_endian
66+
? ut_crc32_legacy_big_endian
67+
: ut_crc32;
6868

69-
return(checksum);
69+
const uint32_t c1 = crc32_func(
70+
page + FIL_PAGE_OFFSET,
71+
FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET);
72+
73+
const uint32_t c2 = crc32_func(
74+
page + FIL_PAGE_DATA,
75+
UNIV_PAGE_SIZE - FIL_PAGE_DATA - FIL_PAGE_END_LSN_OLD_CHKSUM);
76+
77+
return(c1 ^ c2);
7078
}
7179

7280
/********************************************************************//**

storage/innobase/buf/buf0flu.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -807,7 +807,7 @@ buf_flush_update_zip_checksum(
807807
{
808808
ut_a(size > 0);
809809

810-
ib_uint32_t checksum = page_zip_calc_checksum(
810+
const uint32_t checksum = page_zip_calc_checksum(
811811
page, size,
812812
static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm));
813813

storage/innobase/buf/buf0lru.cc

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2034,8 +2034,6 @@ buf_LRU_free_page(
20342034

20352035
if (b != NULL) {
20362036

2037-
ib_uint32_t checksum;
2038-
20392037
/* Compute and stamp the compressed page
20402038
checksum while not holding any mutex. The
20412039
block is already half-freed
@@ -2045,7 +2043,7 @@ buf_LRU_free_page(
20452043

20462044
ut_ad(b->size.is_compressed());
20472045

2048-
checksum = page_zip_calc_checksum(
2046+
const uint32_t checksum = page_zip_calc_checksum(
20492047
b->zip.data,
20502048
b->size.physical(),
20512049
static_cast<srv_checksum_algorithm_t>(

storage/innobase/dict/dict0mem.cc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -715,10 +715,9 @@ void
715715
dict_mem_init(void)
716716
{
717717
/* Initialize a randomly distributed temporary file number */
718-
ib_uint32_t now = static_cast<ib_uint32_t>(ut_time());
718+
ib_uint32_t now = static_cast<ib_uint32_t>(ut_time());
719719

720-
const byte* buf = reinterpret_cast<const byte*>(&now);
721-
ut_ad(ut_crc32 != NULL);
720+
const byte* buf = reinterpret_cast<const byte*>(&now);
722721

723722
dict_temp_file_num = ut_crc32(buf, sizeof(now));
724723

storage/innobase/include/buf0checksum.h

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*****************************************************************************
22
3-
Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
3+
Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
44
55
This program is free software; you can redistribute it and/or modify it under
66
the terms of the GNU General Public License as published by the Free Software
@@ -34,15 +34,19 @@ Created Aug 11, 2011 Vasil Dimov
3434

3535
#endif /* !UNIV_INNOCHECKSUM */
3636

37-
/********************************************************************//**
38-
Calculates a page CRC32 which is stored to the page when it is written
39-
to a file. Note that we must be careful to calculate the same value on
40-
32-bit and 64-bit architectures.
37+
/** Calculates the CRC32 checksum of a page. The value is stored to the page
38+
when it is written to a file and also checked for a match when reading from
39+
the file. When reading we allow both normal CRC32 and CRC-legacy-big-endian
40+
variants. Note that we must be careful to calculate the same value on 32-bit
41+
and 64-bit architectures.
42+
@param[in] page buffer page (UNIV_PAGE_SIZE bytes)
43+
@param[in] use_legacy_big_endian if true then use big endian
44+
byteorder when converting byte strings to integers
4145
@return checksum */
42-
ib_uint32_t
46+
uint32_t
4347
buf_calc_page_crc32(
44-
/*================*/
45-
const byte* page); /*!< in: buffer page */
48+
const byte* page,
49+
bool use_legacy_big_endian = false);
4650

4751
/********************************************************************//**
4852
Calculates a page checksum which is stored to the page when it is written

storage/innobase/include/log0log.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -370,11 +370,22 @@ ulint
370370
log_block_calc_checksum_innodb(const byte* block);
371371

372372
/** Calculates the checksum for a log block using the CRC32 algorithm.
373-
@param[in] block the redo log block
374-
@return the calculated checksum value */
373+
@param[in] block log block
374+
@return checksum */
375+
UNIV_INLINE
376+
ulint
377+
log_block_calc_checksum_crc32(
378+
const byte* block);
379+
380+
/** Calculates the checksum for a log block using the CRC32 algorithm.
381+
This function uses big endian byteorder when converting byte strings to
382+
integers.
383+
@param[in] block log block
384+
@return checksum */
375385
UNIV_INLINE
376386
ulint
377-
log_block_calc_checksum_crc32(const byte* block);
387+
log_block_calc_checksum_crc32_legacy_big_endian(
388+
const byte* block);
378389

379390
/** Calculates the checksum for a log block using the "no-op" algorithm.
380391
@param[in] block the redo log block

storage/innobase/include/log0log.ic

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -230,14 +230,28 @@ log_block_calc_checksum_innodb(
230230
}
231231

232232
/** Calculates the checksum for a log block using the CRC32 algorithm.
233-
@param[in] block log block
234-
@return checksum */
233+
@param[in] block log block
234+
@return checksum */
235235
UNIV_INLINE
236236
ulint
237237
log_block_calc_checksum_crc32(
238238
const byte* block)
239239
{
240-
return(ut_crc32(block, OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE));
240+
return(ut_crc32(block, OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE));
241+
}
242+
243+
/** Calculates the checksum for a log block using the CRC32 algorithm.
244+
This function uses big endian byteorder when converting byte strings to
245+
integers.
246+
@param[in] block log block
247+
@return checksum */
248+
UNIV_INLINE
249+
ulint
250+
log_block_calc_checksum_crc32_legacy_big_endian(
251+
const byte* block)
252+
{
253+
return(ut_crc32_legacy_big_endian(
254+
block, OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE));
241255
}
242256

243257
/** Calculates the checksum for a log block using the "no-op" algorithm.

storage/innobase/include/page0zip.h

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -481,16 +481,21 @@ page_zip_parse_compress(
481481
page_zip_des_t* page_zip); /*!< out: compressed page */
482482

483483
#endif /* !UNIV_INNOCHECKSUM */
484-
/**********************************************************************//**
485-
Calculate the compressed page checksum.
484+
485+
/** Calculate the compressed page checksum.
486+
@param[in] data compressed page
487+
@param[in] size size of compressed page
488+
@param[in] algo algorithm to use
489+
@param[in] use_legacy_big_endian only used if algo is
490+
SRV_CHECKSUM_ALGORITHM_CRC32 or SRV_CHECKSUM_ALGORITHM_STRICT_CRC32 - if true
491+
then use big endian byteorder when converting byte strings to integers.
486492
@return page checksum */
487-
ib_uint32_t
493+
uint32_t
488494
page_zip_calc_checksum(
489-
/*===================*/
490-
const void* data, /*!< in: compressed page */
491-
ulint size, /*!< in: size of compressed page */
492-
srv_checksum_algorithm_t algo) /*!< in: algorithm to use */
493-
__attribute__((nonnull));
495+
const void* data,
496+
ulint size,
497+
srv_checksum_algorithm_t algo,
498+
bool use_legacy_big_endian = false);
494499

495500
/**********************************************************************//**
496501
Verify a compressed page's checksum.

0 commit comments

Comments
 (0)