22
22
#include "volumes.h"
23
23
#include "disk-io.h"
24
24
#include "ordered-data.h"
25
+ #include "transaction.h"
25
26
#include "backref.h"
26
27
27
28
/*
@@ -89,6 +90,7 @@ struct scrub_dev {
89
90
int first_free ;
90
91
int curr ;
91
92
atomic_t in_flight ;
93
+ atomic_t fixup_cnt ;
92
94
spinlock_t list_lock ;
93
95
wait_queue_head_t list_wait ;
94
96
u16 csum_size ;
@@ -102,6 +104,14 @@ struct scrub_dev {
102
104
spinlock_t stat_lock ;
103
105
};
104
106
107
+ struct scrub_fixup_nodatasum {
108
+ struct scrub_dev * sdev ;
109
+ u64 logical ;
110
+ struct btrfs_root * root ;
111
+ struct btrfs_work work ;
112
+ int mirror_num ;
113
+ };
114
+
105
115
struct scrub_warning {
106
116
struct btrfs_path * path ;
107
117
u64 extent_item_size ;
@@ -190,12 +200,13 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
190
200
191
201
if (i != SCRUB_BIOS_PER_DEV - 1 )
192
202
sdev -> bios [i ]-> next_free = i + 1 ;
193
- else
203
+ else
194
204
sdev -> bios [i ]-> next_free = -1 ;
195
205
}
196
206
sdev -> first_free = 0 ;
197
207
sdev -> curr = -1 ;
198
208
atomic_set (& sdev -> in_flight , 0 );
209
+ atomic_set (& sdev -> fixup_cnt , 0 );
199
210
atomic_set (& sdev -> cancel_req , 0 );
200
211
sdev -> csum_size = btrfs_super_csum_size (& fs_info -> super_copy );
201
212
INIT_LIST_HEAD (& sdev -> csum_list );
@@ -347,6 +358,151 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
347
358
kfree (swarn .msg_buf );
348
359
}
349
360
361
+ static int scrub_fixup_readpage (u64 inum , u64 offset , u64 root , void * ctx )
362
+ {
363
+ struct page * page ;
364
+ unsigned long index ;
365
+ struct scrub_fixup_nodatasum * fixup = ctx ;
366
+ int ret ;
367
+ int corrected ;
368
+ struct btrfs_key key ;
369
+ struct inode * inode ;
370
+ u64 end = offset + PAGE_SIZE - 1 ;
371
+ struct btrfs_root * local_root ;
372
+
373
+ key .objectid = root ;
374
+ key .type = BTRFS_ROOT_ITEM_KEY ;
375
+ key .offset = (u64 )- 1 ;
376
+ local_root = btrfs_read_fs_root_no_name (fixup -> root -> fs_info , & key );
377
+ if (IS_ERR (local_root ))
378
+ return PTR_ERR (local_root );
379
+
380
+ key .type = BTRFS_INODE_ITEM_KEY ;
381
+ key .objectid = inum ;
382
+ key .offset = 0 ;
383
+ inode = btrfs_iget (fixup -> root -> fs_info -> sb , & key , local_root , NULL );
384
+ if (IS_ERR (inode ))
385
+ return PTR_ERR (inode );
386
+
387
+ ret = set_extent_bit (& BTRFS_I (inode )-> io_tree , offset , end ,
388
+ EXTENT_DAMAGED , 0 , NULL , NULL , GFP_NOFS );
389
+
390
+ /* set_extent_bit should either succeed or give proper error */
391
+ WARN_ON (ret > 0 );
392
+ if (ret )
393
+ return ret < 0 ? ret : - EFAULT ;
394
+
395
+ index = offset >> PAGE_CACHE_SHIFT ;
396
+
397
+ page = find_or_create_page (inode -> i_mapping , index , GFP_NOFS );
398
+ if (!page )
399
+ return - ENOMEM ;
400
+
401
+ ret = extent_read_full_page (& BTRFS_I (inode )-> io_tree , page ,
402
+ btrfs_get_extent , fixup -> mirror_num );
403
+ wait_on_page_locked (page );
404
+ corrected = !test_range_bit (& BTRFS_I (inode )-> io_tree , offset , end ,
405
+ EXTENT_DAMAGED , 0 , NULL );
406
+
407
+ if (corrected )
408
+ WARN_ON (!PageUptodate (page ));
409
+ else
410
+ clear_extent_bit (& BTRFS_I (inode )-> io_tree , offset , end ,
411
+ EXTENT_DAMAGED , 0 , 0 , NULL , GFP_NOFS );
412
+
413
+ put_page (page );
414
+ iput (inode );
415
+
416
+ if (ret < 0 )
417
+ return ret ;
418
+
419
+ if (ret == 0 && corrected ) {
420
+ /*
421
+ * we only need to call readpage for one of the inodes belonging
422
+ * to this extent. so make iterate_extent_inodes stop
423
+ */
424
+ return 1 ;
425
+ }
426
+
427
+ return - EIO ;
428
+ }
429
+
430
+ static void scrub_fixup_nodatasum (struct btrfs_work * work )
431
+ {
432
+ int ret ;
433
+ struct scrub_fixup_nodatasum * fixup ;
434
+ struct scrub_dev * sdev ;
435
+ struct btrfs_trans_handle * trans = NULL ;
436
+ struct btrfs_fs_info * fs_info ;
437
+ struct btrfs_path * path ;
438
+ int uncorrectable = 0 ;
439
+
440
+ fixup = container_of (work , struct scrub_fixup_nodatasum , work );
441
+ sdev = fixup -> sdev ;
442
+ fs_info = fixup -> root -> fs_info ;
443
+
444
+ path = btrfs_alloc_path ();
445
+ if (!path ) {
446
+ spin_lock (& sdev -> stat_lock );
447
+ ++ sdev -> stat .malloc_errors ;
448
+ spin_unlock (& sdev -> stat_lock );
449
+ uncorrectable = 1 ;
450
+ goto out ;
451
+ }
452
+
453
+ trans = btrfs_join_transaction (fixup -> root );
454
+ if (IS_ERR (trans )) {
455
+ uncorrectable = 1 ;
456
+ goto out ;
457
+ }
458
+
459
+ /*
460
+ * the idea is to trigger a regular read through the standard path. we
461
+ * read a page from the (failed) logical address by specifying the
462
+ * corresponding copynum of the failed sector. thus, that readpage is
463
+ * expected to fail.
464
+ * that is the point where on-the-fly error correction will kick in
465
+ * (once it's finished) and rewrite the failed sector if a good copy
466
+ * can be found.
467
+ */
468
+ ret = iterate_inodes_from_logical (fixup -> logical , fixup -> root -> fs_info ,
469
+ path , scrub_fixup_readpage ,
470
+ fixup );
471
+ if (ret < 0 ) {
472
+ uncorrectable = 1 ;
473
+ goto out ;
474
+ }
475
+ WARN_ON (ret != 1 );
476
+
477
+ spin_lock (& sdev -> stat_lock );
478
+ ++ sdev -> stat .corrected_errors ;
479
+ spin_unlock (& sdev -> stat_lock );
480
+
481
+ out :
482
+ if (trans && !IS_ERR (trans ))
483
+ btrfs_end_transaction (trans , fixup -> root );
484
+ if (uncorrectable ) {
485
+ spin_lock (& sdev -> stat_lock );
486
+ ++ sdev -> stat .uncorrectable_errors ;
487
+ spin_unlock (& sdev -> stat_lock );
488
+ printk_ratelimited (KERN_ERR "btrfs: unable to fixup "
489
+ "(nodatasum) error at logical %llu\n" ,
490
+ fixup -> logical );
491
+ }
492
+
493
+ btrfs_free_path (path );
494
+ kfree (fixup );
495
+
496
+ /* see caller why we're pretending to be paused in the scrub counters */
497
+ mutex_lock (& fs_info -> scrub_lock );
498
+ atomic_dec (& fs_info -> scrubs_running );
499
+ atomic_dec (& fs_info -> scrubs_paused );
500
+ mutex_unlock (& fs_info -> scrub_lock );
501
+ atomic_dec (& sdev -> fixup_cnt );
502
+ wake_up (& fs_info -> scrub_pause_wait );
503
+ wake_up (& sdev -> list_wait );
504
+ }
505
+
350
506
/*
351
507
* scrub_recheck_error gets called when either verification of the page
352
508
* failed or the bio failed to read, e.g. with EIO. In the latter case,
@@ -417,6 +573,7 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix)
417
573
struct btrfs_fs_info * fs_info = sdev -> dev -> dev_root -> fs_info ;
418
574
struct btrfs_mapping_tree * map_tree = & fs_info -> mapping_tree ;
419
575
struct btrfs_multi_bio * multi = NULL ;
576
+ struct scrub_fixup_nodatasum * fixup ;
420
577
u64 logical = sbio -> logical + ix * PAGE_SIZE ;
421
578
u64 length ;
422
579
int i ;
@@ -425,12 +582,30 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix)
425
582
426
583
if ((sbio -> spag [ix ].flags & BTRFS_EXTENT_FLAG_DATA ) &&
427
584
(sbio -> spag [ix ].have_csum == 0 )) {
585
+ fixup = kzalloc (sizeof (* fixup ), GFP_NOFS );
586
+ if (!fixup )
587
+ goto uncorrectable ;
588
+ fixup -> sdev = sdev ;
589
+ fixup -> logical = logical ;
590
+ fixup -> root = fs_info -> extent_root ;
591
+ fixup -> mirror_num = sbio -> spag [ix ].mirror_num ;
428
592
/*
429
- * nodatasum, don't try to fix anything
430
- * FIXME: we can do better, open the inode and trigger a
431
- * writeback
593
+ * increment scrubs_running to prevent cancel requests from
594
+ * completing as long as a fixup worker is running. we must also
595
+ * increment scrubs_paused to prevent deadlocking on pause
596
+ * requests used for transactions commits (as the worker uses a
597
+ * transaction context). it is safe to regard the fixup worker
598
+ * as paused for all matters practical. effectively, we only
599
+ * avoid cancellation requests from completing.
432
600
*/
433
- goto uncorrectable ;
601
+ mutex_lock (& fs_info -> scrub_lock );
602
+ atomic_inc (& fs_info -> scrubs_running );
603
+ atomic_inc (& fs_info -> scrubs_paused );
604
+ mutex_unlock (& fs_info -> scrub_lock );
605
+ atomic_inc (& sdev -> fixup_cnt );
606
+ fixup -> work .func = scrub_fixup_nodatasum ;
607
+ btrfs_queue_worker (& fs_info -> scrub_workers , & fixup -> work );
608
+ return ;
434
609
}
435
610
436
611
length = PAGE_SIZE ;
@@ -1425,10 +1600,11 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
1425
1600
ret = scrub_enumerate_chunks (sdev , start , end );
1426
1601
1427
1602
wait_event (sdev -> list_wait , atomic_read (& sdev -> in_flight ) == 0 );
1428
-
1429
1603
atomic_dec (& fs_info -> scrubs_running );
1430
1604
wake_up (& fs_info -> scrub_pause_wait );
1431
1605
1606
+ wait_event (sdev -> list_wait , atomic_read (& sdev -> fixup_cnt ) == 0 );
1607
+
1432
1608
if (progress )
1433
1609
memcpy (progress , & sdev -> stat , sizeof (* progress ));
1434
1610
0 commit comments