Skip to content

Commit 5b1fe7b

Browse files
idryomovsnitm
authored andcommitted
dm cache metadata: set dirty on all cache blocks after a crash
Quoting Documentation/device-mapper/cache.txt: The 'dirty' state for a cache block changes far too frequently for us to keep updating it on the fly. So we treat it as a hint. In normal operation it will be written when the dm device is suspended. If the system crashes all cache blocks will be assumed dirty when restarted. This got broken in commit f177940 ("dm cache metadata: switch to using the new cursor api for loading metadata") in 4.9, which removed the code that consulted cmd->clean_when_opened (CLEAN_SHUTDOWN on-disk flag) when loading cache blocks. This results in data corruption on an unclean shutdown with dirty cache blocks on the fast device. After the crash those blocks are considered clean and may get evicted from the cache at any time. This can be demonstrated by doing a lot of reads to trigger individual evictions, but uncache is more predictable: ### Disable auto-activation in lvm.conf to be able to do uncache in ### time (i.e. see uncache doing flushing) when the fix is applied. # xfs_io -d -c 'pwrite -b 4M -S 0xaa 0 1G' /dev/vdb # vgcreate vg_cache /dev/vdb /dev/vdc # lvcreate -L 1G -n lv_slowdev vg_cache /dev/vdb # lvcreate -L 512M -n lv_cachedev vg_cache /dev/vdc # lvcreate -L 256M -n lv_metadev vg_cache /dev/vdc # lvconvert --type cache-pool --cachemode writeback vg_cache/lv_cachedev --poolmetadata vg_cache/lv_metadev # lvconvert --type cache vg_cache/lv_slowdev --cachepool vg_cache/lv_cachedev # xfs_io -d -c 'pwrite -b 4M -S 0xbb 0 512M' /dev/mapper/vg_cache-lv_slowdev # xfs_io -d -c 'pread -v 254M 512' /dev/mapper/vg_cache-lv_slowdev | head -n 2 0fe00000: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ 0fe00010: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ # dmsetup status vg_cache-lv_slowdev 0 2097152 cache 8 27/65536 128 8192/8192 1 100 0 0 0 8192 7065 2 metadata2 writeback 2 migration_threshold 2048 smq 0 rw - ^^^^ 7065 * 64k = 441M yet to be written to the slow device # echo b >/proc/sysrq-trigger # vgchange -ay vg_cache # xfs_io -d -c 'pread -v 254M 512' /dev/mapper/vg_cache-lv_slowdev | head -n 2 0fe00000: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ 0fe00010: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ # lvconvert --uncache vg_cache/lv_slowdev Flushing 0 blocks for cache vg_cache/lv_slowdev. Logical volume "lv_cachedev" successfully removed Logical volume vg_cache/lv_slowdev is not cached. # xfs_io -d -c 'pread -v 254M 512' /dev/mapper/vg_cache-lv_slowdev | head -n 2 0fe00000: aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa ................ 0fe00010: aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa ................ This is the case with both v1 and v2 cache pool metatata formats. After applying this patch: # vgchange -ay vg_cache # xfs_io -d -c 'pread -v 254M 512' /dev/mapper/vg_cache-lv_slowdev | head -n 2 0fe00000: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ 0fe00010: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ # lvconvert --uncache vg_cache/lv_slowdev Flushing 3724 blocks for cache vg_cache/lv_slowdev. ... Flushing 71 blocks for cache vg_cache/lv_slowdev. Logical volume "lv_cachedev" successfully removed Logical volume vg_cache/lv_slowdev is not cached. # xfs_io -d -c 'pread -v 254M 512' /dev/mapper/vg_cache-lv_slowdev | head -n 2 0fe00000: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ 0fe00010: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ Cc: [email protected] Fixes: f177940 ("dm cache metadata: switch to using the new cursor api for loading metadata") Signed-off-by: Ilya Dryomov <[email protected]> Signed-off-by: Mike Snitzer <[email protected]>
1 parent c9a5e6a commit 5b1fe7b

File tree

1 file changed

+7
-3
lines changed

1 file changed

+7
-3
lines changed

drivers/md/dm-cache-metadata.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1323,6 +1323,7 @@ static int __load_mapping_v1(struct dm_cache_metadata *cmd,
13231323

13241324
dm_oblock_t oblock;
13251325
unsigned flags;
1326+
bool dirty = true;
13261327

13271328
dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le);
13281329
memcpy(&mapping, mapping_value_le, sizeof(mapping));
@@ -1333,8 +1334,10 @@ static int __load_mapping_v1(struct dm_cache_metadata *cmd,
13331334
dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le);
13341335
memcpy(&hint, hint_value_le, sizeof(hint));
13351336
}
1337+
if (cmd->clean_when_opened)
1338+
dirty = flags & M_DIRTY;
13361339

1337-
r = fn(context, oblock, to_cblock(cb), flags & M_DIRTY,
1340+
r = fn(context, oblock, to_cblock(cb), dirty,
13381341
le32_to_cpu(hint), hints_valid);
13391342
if (r) {
13401343
DMERR("policy couldn't load cache block %llu",
@@ -1362,7 +1365,7 @@ static int __load_mapping_v2(struct dm_cache_metadata *cmd,
13621365

13631366
dm_oblock_t oblock;
13641367
unsigned flags;
1365-
bool dirty;
1368+
bool dirty = true;
13661369

13671370
dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le);
13681371
memcpy(&mapping, mapping_value_le, sizeof(mapping));
@@ -1373,8 +1376,9 @@ static int __load_mapping_v2(struct dm_cache_metadata *cmd,
13731376
dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le);
13741377
memcpy(&hint, hint_value_le, sizeof(hint));
13751378
}
1379+
if (cmd->clean_when_opened)
1380+
dirty = dm_bitset_cursor_get_value(dirty_cursor);
13761381

1377-
dirty = dm_bitset_cursor_get_value(dirty_cursor);
13781382
r = fn(context, oblock, to_cblock(cb), dirty,
13791383
le32_to_cpu(hint), hints_valid);
13801384
if (r) {

0 commit comments

Comments
 (0)