@@ -429,7 +429,7 @@ static void get_futex_key_refs(union futex_key *key)
429
429
430
430
switch (key -> both .offset & (FUT_OFF_INODE |FUT_OFF_MMSHARED )) {
431
431
case FUT_OFF_INODE :
432
- ihold ( key -> shared . inode ); /* implies smp_mb(); (B) */
432
+ smp_mb (); /* explicit smp_mb(); (B) */
433
433
break ;
434
434
case FUT_OFF_MMSHARED :
435
435
futex_get_mm (key ); /* implies smp_mb(); (B) */
@@ -463,7 +463,6 @@ static void drop_futex_key_refs(union futex_key *key)
463
463
464
464
switch (key -> both .offset & (FUT_OFF_INODE |FUT_OFF_MMSHARED )) {
465
465
case FUT_OFF_INODE :
466
- iput (key -> shared .inode );
467
466
break ;
468
467
case FUT_OFF_MMSHARED :
469
468
mmdrop (key -> private .mm );
@@ -505,6 +504,46 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
505
504
return timeout ;
506
505
}
507
506
507
+ /*
508
+ * Generate a machine wide unique identifier for this inode.
509
+ *
510
+ * This relies on u64 not wrapping in the life-time of the machine; which with
511
+ * 1ns resolution means almost 585 years.
512
+ *
513
+ * This further relies on the fact that a well formed program will not unmap
514
+ * the file while it has a (shared) futex waiting on it. This mapping will have
515
+ * a file reference which pins the mount and inode.
516
+ *
517
+ * If for some reason an inode gets evicted and read back in again, it will get
518
+ * a new sequence number and will _NOT_ match, even though it is the exact same
519
+ * file.
520
+ *
521
+ * It is important that match_futex() will never have a false-positive, esp.
522
+ * for PI futexes that can mess up the state. The above argues that false-negatives
523
+ * are only possible for malformed programs.
524
+ */
525
+ static u64 get_inode_sequence_number (struct inode * inode )
526
+ {
527
+ static atomic64_t i_seq ;
528
+ u64 old ;
529
+
530
+ /* Does the inode already have a sequence number? */
531
+ old = atomic64_read (& inode -> i_sequence );
532
+ if (likely (old ))
533
+ return old ;
534
+
535
+ for (;;) {
536
+ u64 new = atomic64_add_return (1 , & i_seq );
537
+ if (WARN_ON_ONCE (!new ))
538
+ continue ;
539
+
540
+ old = atomic64_cmpxchg_relaxed (& inode -> i_sequence , 0 , new );
541
+ if (old )
542
+ return old ;
543
+ return new ;
544
+ }
545
+ }
546
+
508
547
/**
509
548
* get_futex_key() - Get parameters which are the keys for a futex
510
549
* @uaddr: virtual address of the futex
@@ -517,9 +556,15 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
517
556
*
518
557
* The key words are stored in @key on success.
519
558
*
520
- * For shared mappings, it's (page->index, file_inode(vma->vm_file),
521
- * offset_within_page). For private mappings, it's (uaddr, current->mm).
522
- * We can usually work out the index without swapping in the page.
559
+ * For shared mappings (when @fshared), the key is:
560
+ * ( inode->i_sequence, page->index, offset_within_page )
561
+ * [ also see get_inode_sequence_number() ]
562
+ *
563
+ * For private mappings (or when !@fshared), the key is:
564
+ * ( current->mm, address, 0 )
565
+ *
566
+ * This allows (cross process, where applicable) identification of the futex
567
+ * without keeping the page pinned for the duration of the FUTEX_WAIT.
523
568
*
524
569
* lock_page() might sleep, the caller should not hold a spinlock.
525
570
*/
@@ -659,8 +704,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
659
704
key -> private .mm = mm ;
660
705
key -> private .address = address ;
661
706
662
- get_futex_key_refs (key ); /* implies smp_mb(); (B) */
663
-
664
707
} else {
665
708
struct inode * inode ;
666
709
@@ -692,40 +735,14 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
692
735
goto again ;
693
736
}
694
737
695
- /*
696
- * Take a reference unless it is about to be freed. Previously
697
- * this reference was taken by ihold under the page lock
698
- * pinning the inode in place so i_lock was unnecessary. The
699
- * only way for this check to fail is if the inode was
700
- * truncated in parallel which is almost certainly an
701
- * application bug. In such a case, just retry.
702
- *
703
- * We are not calling into get_futex_key_refs() in file-backed
704
- * cases, therefore a successful atomic_inc return below will
705
- * guarantee that get_futex_key() will still imply smp_mb(); (B).
706
- */
707
- if (!atomic_inc_not_zero (& inode -> i_count )) {
708
- rcu_read_unlock ();
709
- put_page (page );
710
-
711
- goto again ;
712
- }
713
-
714
- /* Should be impossible but lets be paranoid for now */
715
- if (WARN_ON_ONCE (inode -> i_mapping != mapping )) {
716
- err = - EFAULT ;
717
- rcu_read_unlock ();
718
- iput (inode );
719
-
720
- goto out ;
721
- }
722
-
723
738
key -> both .offset |= FUT_OFF_INODE ; /* inode-based key */
724
- key -> shared .inode = inode ;
739
+ key -> shared .i_seq = get_inode_sequence_number ( inode ) ;
725
740
key -> shared .pgoff = basepage_index (tail );
726
741
rcu_read_unlock ();
727
742
}
728
743
744
+ get_futex_key_refs (key ); /* implies smp_mb(); (B) */
745
+
729
746
out :
730
747
put_page (page );
731
748
return err ;
0 commit comments