@@ -113,9 +113,9 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
113
113
* The caller must ensure selem->smap is still valid to be
114
114
* dereferenced for its smap->elem_size and smap->cache_idx.
115
115
*/
116
- bool bpf_selem_unlink_storage_nolock (struct bpf_local_storage * local_storage ,
117
- struct bpf_local_storage_elem * selem ,
118
- bool uncharge_mem , bool use_trace_rcu )
116
+ static bool bpf_selem_unlink_storage_nolock (struct bpf_local_storage * local_storage ,
117
+ struct bpf_local_storage_elem * selem ,
118
+ bool uncharge_mem , bool use_trace_rcu )
119
119
{
120
120
struct bpf_local_storage_map * smap ;
121
121
bool free_local_storage ;
@@ -501,7 +501,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
501
501
return ERR_PTR (err );
502
502
}
503
503
504
- u16 bpf_local_storage_cache_idx_get (struct bpf_local_storage_cache * cache )
504
+ static u16 bpf_local_storage_cache_idx_get (struct bpf_local_storage_cache * cache )
505
505
{
506
506
u64 min_usage = U64_MAX ;
507
507
u16 i , res = 0 ;
@@ -525,76 +525,14 @@ u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
525
525
return res ;
526
526
}
527
527
528
- void bpf_local_storage_cache_idx_free (struct bpf_local_storage_cache * cache ,
529
- u16 idx )
528
+ static void bpf_local_storage_cache_idx_free (struct bpf_local_storage_cache * cache ,
529
+ u16 idx )
530
530
{
531
531
spin_lock (& cache -> idx_lock );
532
532
cache -> idx_usage_counts [idx ]-- ;
533
533
spin_unlock (& cache -> idx_lock );
534
534
}
535
535
536
- void bpf_local_storage_map_free (struct bpf_local_storage_map * smap ,
537
- int __percpu * busy_counter )
538
- {
539
- struct bpf_local_storage_elem * selem ;
540
- struct bpf_local_storage_map_bucket * b ;
541
- unsigned int i ;
542
-
543
- /* Note that this map might be concurrently cloned from
544
- * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
545
- * RCU read section to finish before proceeding. New RCU
546
- * read sections should be prevented via bpf_map_inc_not_zero.
547
- */
548
- synchronize_rcu ();
549
-
550
- /* bpf prog and the userspace can no longer access this map
551
- * now. No new selem (of this map) can be added
552
- * to the owner->storage or to the map bucket's list.
553
- *
554
- * The elem of this map can be cleaned up here
555
- * or when the storage is freed e.g.
556
- * by bpf_sk_storage_free() during __sk_destruct().
557
- */
558
- for (i = 0 ; i < (1U << smap -> bucket_log ); i ++ ) {
559
- b = & smap -> buckets [i ];
560
-
561
- rcu_read_lock ();
562
- /* No one is adding to b->list now */
563
- while ((selem = hlist_entry_safe (
564
- rcu_dereference_raw (hlist_first_rcu (& b -> list )),
565
- struct bpf_local_storage_elem , map_node ))) {
566
- if (busy_counter ) {
567
- migrate_disable ();
568
- this_cpu_inc (* busy_counter );
569
- }
570
- bpf_selem_unlink (selem , false);
571
- if (busy_counter ) {
572
- this_cpu_dec (* busy_counter );
573
- migrate_enable ();
574
- }
575
- cond_resched_rcu ();
576
- }
577
- rcu_read_unlock ();
578
- }
579
-
580
- /* While freeing the storage we may still need to access the map.
581
- *
582
- * e.g. when bpf_sk_storage_free() has unlinked selem from the map
583
- * which then made the above while((selem = ...)) loop
584
- * exit immediately.
585
- *
586
- * However, while freeing the storage one still needs to access the
587
- * smap->elem_size to do the uncharging in
588
- * bpf_selem_unlink_storage_nolock().
589
- *
590
- * Hence, wait another rcu grace period for the storage to be freed.
591
- */
592
- synchronize_rcu ();
593
-
594
- kvfree (smap -> buckets );
595
- bpf_map_area_free (smap );
596
- }
597
-
598
536
int bpf_local_storage_map_alloc_check (union bpf_attr * attr )
599
537
{
600
538
if (attr -> map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK ||
@@ -614,7 +552,7 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
614
552
return 0 ;
615
553
}
616
554
617
- struct bpf_local_storage_map * bpf_local_storage_map_alloc (union bpf_attr * attr )
555
+ static struct bpf_local_storage_map * __bpf_local_storage_map_alloc (union bpf_attr * attr )
618
556
{
619
557
struct bpf_local_storage_map * smap ;
620
558
unsigned int i ;
@@ -664,3 +602,117 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map,
664
602
665
603
return 0 ;
666
604
}
605
+
606
+ bool bpf_local_storage_unlink_nolock (struct bpf_local_storage * local_storage )
607
+ {
608
+ struct bpf_local_storage_elem * selem ;
609
+ bool free_storage = false;
610
+ struct hlist_node * n ;
611
+
612
+ /* Neither the bpf_prog nor the bpf_map's syscall
613
+ * could be modifying the local_storage->list now.
614
+ * Thus, no elem can be added to or deleted from the
615
+ * local_storage->list by the bpf_prog or by the bpf_map's syscall.
616
+ *
617
+ * It is racing with bpf_local_storage_map_free() alone
618
+ * when unlinking elem from the local_storage->list and
619
+ * the map's bucket->list.
620
+ */
621
+ hlist_for_each_entry_safe (selem , n , & local_storage -> list , snode ) {
622
+ /* Always unlink from map before unlinking from
623
+ * local_storage.
624
+ */
625
+ bpf_selem_unlink_map (selem );
626
+ /* If local_storage list has only one element, the
627
+ * bpf_selem_unlink_storage_nolock() will return true.
628
+ * Otherwise, it will return false. The current loop iteration
629
+ * intends to remove all local storage. So the last iteration
630
+ * of the loop will set the free_cgroup_storage to true.
631
+ */
632
+ free_storage = bpf_selem_unlink_storage_nolock (
633
+ local_storage , selem , false, false);
634
+ }
635
+
636
+ return free_storage ;
637
+ }
638
+
639
+ struct bpf_map *
640
+ bpf_local_storage_map_alloc (union bpf_attr * attr ,
641
+ struct bpf_local_storage_cache * cache )
642
+ {
643
+ struct bpf_local_storage_map * smap ;
644
+
645
+ smap = __bpf_local_storage_map_alloc (attr );
646
+ if (IS_ERR (smap ))
647
+ return ERR_CAST (smap );
648
+
649
+ smap -> cache_idx = bpf_local_storage_cache_idx_get (cache );
650
+ return & smap -> map ;
651
+ }
652
+
653
+ void bpf_local_storage_map_free (struct bpf_map * map ,
654
+ struct bpf_local_storage_cache * cache ,
655
+ int __percpu * busy_counter )
656
+ {
657
+ struct bpf_local_storage_map_bucket * b ;
658
+ struct bpf_local_storage_elem * selem ;
659
+ struct bpf_local_storage_map * smap ;
660
+ unsigned int i ;
661
+
662
+ smap = (struct bpf_local_storage_map * )map ;
663
+ bpf_local_storage_cache_idx_free (cache , smap -> cache_idx );
664
+
665
+ /* Note that this map might be concurrently cloned from
666
+ * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
667
+ * RCU read section to finish before proceeding. New RCU
668
+ * read sections should be prevented via bpf_map_inc_not_zero.
669
+ */
670
+ synchronize_rcu ();
671
+
672
+ /* bpf prog and the userspace can no longer access this map
673
+ * now. No new selem (of this map) can be added
674
+ * to the owner->storage or to the map bucket's list.
675
+ *
676
+ * The elem of this map can be cleaned up here
677
+ * or when the storage is freed e.g.
678
+ * by bpf_sk_storage_free() during __sk_destruct().
679
+ */
680
+ for (i = 0 ; i < (1U << smap -> bucket_log ); i ++ ) {
681
+ b = & smap -> buckets [i ];
682
+
683
+ rcu_read_lock ();
684
+ /* No one is adding to b->list now */
685
+ while ((selem = hlist_entry_safe (
686
+ rcu_dereference_raw (hlist_first_rcu (& b -> list )),
687
+ struct bpf_local_storage_elem , map_node ))) {
688
+ if (busy_counter ) {
689
+ migrate_disable ();
690
+ this_cpu_inc (* busy_counter );
691
+ }
692
+ bpf_selem_unlink (selem , false);
693
+ if (busy_counter ) {
694
+ this_cpu_dec (* busy_counter );
695
+ migrate_enable ();
696
+ }
697
+ cond_resched_rcu ();
698
+ }
699
+ rcu_read_unlock ();
700
+ }
701
+
702
+ /* While freeing the storage we may still need to access the map.
703
+ *
704
+ * e.g. when bpf_sk_storage_free() has unlinked selem from the map
705
+ * which then made the above while((selem = ...)) loop
706
+ * exit immediately.
707
+ *
708
+ * However, while freeing the storage one still needs to access the
709
+ * smap->elem_size to do the uncharging in
710
+ * bpf_selem_unlink_storage_nolock().
711
+ *
712
+ * Hence, wait another rcu grace period for the storage to be freed.
713
+ */
714
+ synchronize_rcu ();
715
+
716
+ kvfree (smap -> buckets );
717
+ bpf_map_area_free (smap );
718
+ }
0 commit comments