@@ -288,7 +288,162 @@ bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *
288
288
return need_transaction ;
289
289
}
290
290
291
- static int al_write_transaction (struct drbd_device * device );
291
+ #if (PAGE_SHIFT + 3 ) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT )
292
+ /* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
293
+ * are still coupled, or assume too much about their relation.
294
+ * Code below will not work if this is violated.
295
+ * Will be cleaned up with some followup patch.
296
+ */
297
+ # error FIXME
298
+ #endif
299
+
300
+ static unsigned int al_extent_to_bm_page (unsigned int al_enr )
301
+ {
302
+ return al_enr >>
303
+ /* bit to page */
304
+ ((PAGE_SHIFT + 3 ) -
305
+ /* al extent number to bit */
306
+ (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT ));
307
+ }
308
+
309
+ static sector_t al_tr_number_to_on_disk_sector (struct drbd_device * device )
310
+ {
311
+ const unsigned int stripes = device -> ldev -> md .al_stripes ;
312
+ const unsigned int stripe_size_4kB = device -> ldev -> md .al_stripe_size_4k ;
313
+
314
+ /* transaction number, modulo on-disk ring buffer wrap around */
315
+ unsigned int t = device -> al_tr_number % (device -> ldev -> md .al_size_4k );
316
+
317
+ /* ... to aligned 4k on disk block */
318
+ t = ((t % stripes ) * stripe_size_4kB ) + t /stripes ;
319
+
320
+ /* ... to 512 byte sector in activity log */
321
+ t *= 8 ;
322
+
323
+ /* ... plus offset to the on disk position */
324
+ return device -> ldev -> md .md_offset + device -> ldev -> md .al_offset + t ;
325
+ }
326
+
327
+ static int __al_write_transaction (struct drbd_device * device , struct al_transaction_on_disk * buffer )
328
+ {
329
+ struct lc_element * e ;
330
+ sector_t sector ;
331
+ int i , mx ;
332
+ unsigned extent_nr ;
333
+ unsigned crc = 0 ;
334
+ int err = 0 ;
335
+
336
+ memset (buffer , 0 , sizeof (* buffer ));
337
+ buffer -> magic = cpu_to_be32 (DRBD_AL_MAGIC );
338
+ buffer -> tr_number = cpu_to_be32 (device -> al_tr_number );
339
+
340
+ i = 0 ;
341
+
342
+ /* Even though no one can start to change this list
343
+ * once we set the LC_LOCKED -- from drbd_al_begin_io(),
344
+ * lc_try_lock_for_transaction() --, someone may still
345
+ * be in the process of changing it. */
346
+ spin_lock_irq (& device -> al_lock );
347
+ list_for_each_entry (e , & device -> act_log -> to_be_changed , list ) {
348
+ if (i == AL_UPDATES_PER_TRANSACTION ) {
349
+ i ++ ;
350
+ break ;
351
+ }
352
+ buffer -> update_slot_nr [i ] = cpu_to_be16 (e -> lc_index );
353
+ buffer -> update_extent_nr [i ] = cpu_to_be32 (e -> lc_new_number );
354
+ if (e -> lc_number != LC_FREE )
355
+ drbd_bm_mark_for_writeout (device ,
356
+ al_extent_to_bm_page (e -> lc_number ));
357
+ i ++ ;
358
+ }
359
+ spin_unlock_irq (& device -> al_lock );
360
+ BUG_ON (i > AL_UPDATES_PER_TRANSACTION );
361
+
362
+ buffer -> n_updates = cpu_to_be16 (i );
363
+ for ( ; i < AL_UPDATES_PER_TRANSACTION ; i ++ ) {
364
+ buffer -> update_slot_nr [i ] = cpu_to_be16 (-1 );
365
+ buffer -> update_extent_nr [i ] = cpu_to_be32 (LC_FREE );
366
+ }
367
+
368
+ buffer -> context_size = cpu_to_be16 (device -> act_log -> nr_elements );
369
+ buffer -> context_start_slot_nr = cpu_to_be16 (device -> al_tr_cycle );
370
+
371
+ mx = min_t (int , AL_CONTEXT_PER_TRANSACTION ,
372
+ device -> act_log -> nr_elements - device -> al_tr_cycle );
373
+ for (i = 0 ; i < mx ; i ++ ) {
374
+ unsigned idx = device -> al_tr_cycle + i ;
375
+ extent_nr = lc_element_by_index (device -> act_log , idx )-> lc_number ;
376
+ buffer -> context [i ] = cpu_to_be32 (extent_nr );
377
+ }
378
+ for (; i < AL_CONTEXT_PER_TRANSACTION ; i ++ )
379
+ buffer -> context [i ] = cpu_to_be32 (LC_FREE );
380
+
381
+ device -> al_tr_cycle += AL_CONTEXT_PER_TRANSACTION ;
382
+ if (device -> al_tr_cycle >= device -> act_log -> nr_elements )
383
+ device -> al_tr_cycle = 0 ;
384
+
385
+ sector = al_tr_number_to_on_disk_sector (device );
386
+
387
+ crc = crc32c (0 , buffer , 4096 );
388
+ buffer -> crc32c = cpu_to_be32 (crc );
389
+
390
+ if (drbd_bm_write_hinted (device ))
391
+ err = - EIO ;
392
+ else {
393
+ bool write_al_updates ;
394
+ rcu_read_lock ();
395
+ write_al_updates = rcu_dereference (device -> ldev -> disk_conf )-> al_updates ;
396
+ rcu_read_unlock ();
397
+ if (write_al_updates ) {
398
+ if (drbd_md_sync_page_io (device , device -> ldev , sector , WRITE )) {
399
+ err = - EIO ;
400
+ drbd_chk_io_error (device , 1 , DRBD_META_IO_ERROR );
401
+ } else {
402
+ device -> al_tr_number ++ ;
403
+ device -> al_writ_cnt ++ ;
404
+ }
405
+ }
406
+ }
407
+
408
+ return err ;
409
+ }
410
+
411
+ static int al_write_transaction (struct drbd_device * device )
412
+ {
413
+ struct al_transaction_on_disk * buffer ;
414
+ int err ;
415
+
416
+ if (!get_ldev (device )) {
417
+ drbd_err (device , "disk is %s, cannot start al transaction\n" ,
418
+ drbd_disk_str (device -> state .disk ));
419
+ return - EIO ;
420
+ }
421
+
422
+ /* The bitmap write may have failed, causing a state change. */
423
+ if (device -> state .disk < D_INCONSISTENT ) {
424
+ drbd_err (device ,
425
+ "disk is %s, cannot write al transaction\n" ,
426
+ drbd_disk_str (device -> state .disk ));
427
+ put_ldev (device );
428
+ return - EIO ;
429
+ }
430
+
431
+ /* protects md_io_buffer, al_tr_cycle, ... */
432
+ buffer = drbd_md_get_buffer (device , __func__ );
433
+ if (!buffer ) {
434
+ drbd_err (device , "disk failed while waiting for md_io buffer\n" );
435
+ put_ldev (device );
436
+ return - ENODEV ;
437
+ }
438
+
439
+ err = __al_write_transaction (device , buffer );
440
+
441
+ drbd_md_put_buffer (device );
442
+ put_ldev (device );
443
+
444
+ return err ;
445
+ }
446
+
292
447
293
448
void drbd_al_begin_io_commit (struct drbd_device * device )
294
449
{
@@ -420,153 +575,6 @@ void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i)
420
575
wake_up (& device -> al_wait );
421
576
}
422
577
423
- #if (PAGE_SHIFT + 3 ) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT )
424
- /* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
425
- * are still coupled, or assume too much about their relation.
426
- * Code below will not work if this is violated.
427
- * Will be cleaned up with some followup patch.
428
- */
429
- # error FIXME
430
- #endif
431
-
432
- static unsigned int al_extent_to_bm_page (unsigned int al_enr )
433
- {
434
- return al_enr >>
435
- /* bit to page */
436
- ((PAGE_SHIFT + 3 ) -
437
- /* al extent number to bit */
438
- (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT ));
439
- }
440
-
441
- static sector_t al_tr_number_to_on_disk_sector (struct drbd_device * device )
442
- {
443
- const unsigned int stripes = device -> ldev -> md .al_stripes ;
444
- const unsigned int stripe_size_4kB = device -> ldev -> md .al_stripe_size_4k ;
445
-
446
- /* transaction number, modulo on-disk ring buffer wrap around */
447
- unsigned int t = device -> al_tr_number % (device -> ldev -> md .al_size_4k );
448
-
449
- /* ... to aligned 4k on disk block */
450
- t = ((t % stripes ) * stripe_size_4kB ) + t /stripes ;
451
-
452
- /* ... to 512 byte sector in activity log */
453
- t *= 8 ;
454
-
455
- /* ... plus offset to the on disk position */
456
- return device -> ldev -> md .md_offset + device -> ldev -> md .al_offset + t ;
457
- }
458
-
459
- int al_write_transaction (struct drbd_device * device )
460
- {
461
- struct al_transaction_on_disk * buffer ;
462
- struct lc_element * e ;
463
- sector_t sector ;
464
- int i , mx ;
465
- unsigned extent_nr ;
466
- unsigned crc = 0 ;
467
- int err = 0 ;
468
-
469
- if (!get_ldev (device )) {
470
- drbd_err (device , "disk is %s, cannot start al transaction\n" ,
471
- drbd_disk_str (device -> state .disk ));
472
- return - EIO ;
473
- }
474
-
475
- /* The bitmap write may have failed, causing a state change. */
476
- if (device -> state .disk < D_INCONSISTENT ) {
477
- drbd_err (device ,
478
- "disk is %s, cannot write al transaction\n" ,
479
- drbd_disk_str (device -> state .disk ));
480
- put_ldev (device );
481
- return - EIO ;
482
- }
483
-
484
- /* protects md_io_buffer, al_tr_cycle, ... */
485
- buffer = drbd_md_get_buffer (device , __func__ );
486
- if (!buffer ) {
487
- drbd_err (device , "disk failed while waiting for md_io buffer\n" );
488
- put_ldev (device );
489
- return - ENODEV ;
490
- }
491
-
492
- memset (buffer , 0 , sizeof (* buffer ));
493
- buffer -> magic = cpu_to_be32 (DRBD_AL_MAGIC );
494
- buffer -> tr_number = cpu_to_be32 (device -> al_tr_number );
495
-
496
- i = 0 ;
497
-
498
- /* Even though no one can start to change this list
499
- * once we set the LC_LOCKED -- from drbd_al_begin_io(),
500
- * lc_try_lock_for_transaction() --, someone may still
501
- * be in the process of changing it. */
502
- spin_lock_irq (& device -> al_lock );
503
- list_for_each_entry (e , & device -> act_log -> to_be_changed , list ) {
504
- if (i == AL_UPDATES_PER_TRANSACTION ) {
505
- i ++ ;
506
- break ;
507
- }
508
- buffer -> update_slot_nr [i ] = cpu_to_be16 (e -> lc_index );
509
- buffer -> update_extent_nr [i ] = cpu_to_be32 (e -> lc_new_number );
510
- if (e -> lc_number != LC_FREE )
511
- drbd_bm_mark_for_writeout (device ,
512
- al_extent_to_bm_page (e -> lc_number ));
513
- i ++ ;
514
- }
515
- spin_unlock_irq (& device -> al_lock );
516
- BUG_ON (i > AL_UPDATES_PER_TRANSACTION );
517
-
518
- buffer -> n_updates = cpu_to_be16 (i );
519
- for ( ; i < AL_UPDATES_PER_TRANSACTION ; i ++ ) {
520
- buffer -> update_slot_nr [i ] = cpu_to_be16 (-1 );
521
- buffer -> update_extent_nr [i ] = cpu_to_be32 (LC_FREE );
522
- }
523
-
524
- buffer -> context_size = cpu_to_be16 (device -> act_log -> nr_elements );
525
- buffer -> context_start_slot_nr = cpu_to_be16 (device -> al_tr_cycle );
526
-
527
- mx = min_t (int , AL_CONTEXT_PER_TRANSACTION ,
528
- device -> act_log -> nr_elements - device -> al_tr_cycle );
529
- for (i = 0 ; i < mx ; i ++ ) {
530
- unsigned idx = device -> al_tr_cycle + i ;
531
- extent_nr = lc_element_by_index (device -> act_log , idx )-> lc_number ;
532
- buffer -> context [i ] = cpu_to_be32 (extent_nr );
533
- }
534
- for (; i < AL_CONTEXT_PER_TRANSACTION ; i ++ )
535
- buffer -> context [i ] = cpu_to_be32 (LC_FREE );
536
-
537
- device -> al_tr_cycle += AL_CONTEXT_PER_TRANSACTION ;
538
- if (device -> al_tr_cycle >= device -> act_log -> nr_elements )
539
- device -> al_tr_cycle = 0 ;
540
-
541
- sector = al_tr_number_to_on_disk_sector (device );
542
-
543
- crc = crc32c (0 , buffer , 4096 );
544
- buffer -> crc32c = cpu_to_be32 (crc );
545
-
546
- if (drbd_bm_write_hinted (device ))
547
- err = - EIO ;
548
- else {
549
- bool write_al_updates ;
550
- rcu_read_lock ();
551
- write_al_updates = rcu_dereference (device -> ldev -> disk_conf )-> al_updates ;
552
- rcu_read_unlock ();
553
- if (write_al_updates ) {
554
- if (drbd_md_sync_page_io (device , device -> ldev , sector , WRITE )) {
555
- err = - EIO ;
556
- drbd_chk_io_error (device , 1 , DRBD_META_IO_ERROR );
557
- } else {
558
- device -> al_tr_number ++ ;
559
- device -> al_writ_cnt ++ ;
560
- }
561
- }
562
- }
563
-
564
- drbd_md_put_buffer (device );
565
- put_ldev (device );
566
-
567
- return err ;
568
- }
569
-
570
578
static int _try_lc_del (struct drbd_device * device , struct lc_element * al_ext )
571
579
{
572
580
int rv ;
@@ -606,21 +614,24 @@ void drbd_al_shrink(struct drbd_device *device)
606
614
wake_up (& device -> al_wait );
607
615
}
608
616
609
- int drbd_initialize_al (struct drbd_device * device , void * buffer )
617
+ int drbd_al_initialize (struct drbd_device * device , void * buffer )
610
618
{
611
619
struct al_transaction_on_disk * al = buffer ;
612
620
struct drbd_md * md = & device -> ldev -> md ;
613
- sector_t al_base = md -> md_offset + md -> al_offset ;
614
621
int al_size_4k = md -> al_stripes * md -> al_stripe_size_4k ;
615
622
int i ;
616
623
617
- memset (al , 0 , 4096 );
618
- al -> magic = cpu_to_be32 (DRBD_AL_MAGIC );
619
- al -> transaction_type = cpu_to_be16 (AL_TR_INITIALIZED );
620
- al -> crc32c = cpu_to_be32 (crc32c (0 , al , 4096 ));
624
+ __al_write_transaction (device , al );
625
+ /* There may or may not have been a pending transaction. */
626
+ spin_lock_irq (& device -> al_lock );
627
+ lc_committed (device -> act_log );
628
+ spin_unlock_irq (& device -> al_lock );
621
629
622
- for (i = 0 ; i < al_size_4k ; i ++ ) {
623
- int err = drbd_md_sync_page_io (device , device -> ldev , al_base + i * 8 , WRITE );
630
+ /* The rest of the transactions will have an empty "updates" list, and
631
+ * are written out only to provide the context, and to initialize the
632
+ * on-disk ring buffer. */
633
+ for (i = 1 ; i < al_size_4k ; i ++ ) {
634
+ int err = __al_write_transaction (device , al );
624
635
if (err )
625
636
return err ;
626
637
}
0 commit comments