@@ -350,7 +350,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg,
350
350
}
351
351
352
352
/*
353
- * This will be the memcg's index in each cache 's ->memcg_params.memcg_caches .
353
+ * This will be used as a shrinker list 's index .
354
354
* The main reason for not using cgroup id for this:
355
355
* this works better in sparse environments, where we have a lot of memcgs,
356
356
* but only a few kmem-limited. Or also, if we have, for instance, 200
@@ -569,20 +569,16 @@ ino_t page_cgroup_ino(struct page *page)
569
569
unsigned long ino = 0 ;
570
570
571
571
rcu_read_lock ();
572
- if (PageSlab (page ) && !PageTail (page )) {
573
- memcg = memcg_from_slab_page (page );
574
- } else {
575
- memcg = page -> mem_cgroup ;
572
+ memcg = page -> mem_cgroup ;
576
573
577
- /*
578
- * The lowest bit set means that memcg isn't a valid
579
- * memcg pointer, but a obj_cgroups pointer.
580
- * In this case the page is shared and doesn't belong
581
- * to any specific memory cgroup.
582
- */
583
- if ((unsigned long ) memcg & 0x1UL )
584
- memcg = NULL ;
585
- }
574
+ /*
575
+ * The lowest bit set means that memcg isn't a valid
576
+ * memcg pointer, but a obj_cgroups pointer.
577
+ * In this case the page is shared and doesn't belong
578
+ * to any specific memory cgroup.
579
+ */
580
+ if ((unsigned long ) memcg & 0x1UL )
581
+ memcg = NULL ;
586
582
587
583
while (memcg && !(memcg -> css .flags & CSS_ONLINE ))
588
584
memcg = parent_mem_cgroup (memcg );
@@ -2822,12 +2818,18 @@ struct mem_cgroup *mem_cgroup_from_obj(void *p)
2822
2818
page = virt_to_head_page (p );
2823
2819
2824
2820
/*
2825
- * Slab pages don't have page->mem_cgroup set because corresponding
2826
- * kmem caches can be reparented during the lifetime. That's why
2827
- * memcg_from_slab_page() should be used instead .
2821
+ * Slab objects are accounted individually, not per-page.
2822
+ * Memcg membership data for each individual object is saved in
2823
+ * the page->obj_cgroups .
2828
2824
*/
2829
- if (PageSlab (page ))
2830
- return memcg_from_slab_page (page );
2825
+ if (page_has_obj_cgroups (page )) {
2826
+ struct obj_cgroup * objcg ;
2827
+ unsigned int off ;
2828
+
2829
+ off = obj_to_index (page -> slab_cache , page , p );
2830
+ objcg = page_obj_cgroups (page )[off ];
2831
+ return obj_cgroup_memcg (objcg );
2832
+ }
2831
2833
2832
2834
/* All other pages use page->mem_cgroup */
2833
2835
return page -> mem_cgroup ;
@@ -2882,9 +2884,7 @@ static int memcg_alloc_cache_id(void)
2882
2884
else if (size > MEMCG_CACHES_MAX_SIZE )
2883
2885
size = MEMCG_CACHES_MAX_SIZE ;
2884
2886
2885
- err = memcg_update_all_caches (size );
2886
- if (!err )
2887
- err = memcg_update_all_list_lrus (size );
2887
+ err = memcg_update_all_list_lrus (size );
2888
2888
if (!err )
2889
2889
memcg_nr_cache_ids = size ;
2890
2890
@@ -2903,7 +2903,6 @@ static void memcg_free_cache_id(int id)
2903
2903
}
2904
2904
2905
2905
struct memcg_kmem_cache_create_work {
2906
- struct mem_cgroup * memcg ;
2907
2906
struct kmem_cache * cachep ;
2908
2907
struct work_struct work ;
2909
2908
};
@@ -2912,136 +2911,51 @@ static void memcg_kmem_cache_create_func(struct work_struct *w)
2912
2911
{
2913
2912
struct memcg_kmem_cache_create_work * cw =
2914
2913
container_of (w , struct memcg_kmem_cache_create_work , work );
2915
- struct mem_cgroup * memcg = cw -> memcg ;
2916
2914
struct kmem_cache * cachep = cw -> cachep ;
2917
2915
2918
- memcg_create_kmem_cache (memcg , cachep );
2916
+ memcg_create_kmem_cache (cachep );
2919
2917
2920
- css_put (& memcg -> css );
2921
2918
kfree (cw );
2922
2919
}
2923
2920
2924
2921
/*
2925
2922
* Enqueue the creation of a per-memcg kmem_cache.
2926
2923
*/
2927
- static void memcg_schedule_kmem_cache_create (struct mem_cgroup * memcg ,
2928
- struct kmem_cache * cachep )
2924
+ static void memcg_schedule_kmem_cache_create (struct kmem_cache * cachep )
2929
2925
{
2930
2926
struct memcg_kmem_cache_create_work * cw ;
2931
2927
2932
- if (!css_tryget_online (& memcg -> css ))
2933
- return ;
2934
-
2935
2928
cw = kmalloc (sizeof (* cw ), GFP_NOWAIT | __GFP_NOWARN );
2936
- if (!cw ) {
2937
- css_put (& memcg -> css );
2929
+ if (!cw )
2938
2930
return ;
2939
- }
2940
2931
2941
- cw -> memcg = memcg ;
2942
2932
cw -> cachep = cachep ;
2943
2933
INIT_WORK (& cw -> work , memcg_kmem_cache_create_func );
2944
2934
2945
2935
queue_work (memcg_kmem_cache_wq , & cw -> work );
2946
2936
}
2947
2937
2948
2938
/**
2949
- * memcg_kmem_get_cache: select the correct per-memcg cache for allocation
2939
+ * memcg_kmem_get_cache: select memcg or root cache for allocation
2950
2940
* @cachep: the original global kmem cache
2951
2941
*
2952
2942
* Return the kmem_cache we're supposed to use for a slab allocation.
2953
- * We try to use the current memcg's version of the cache.
2954
2943
*
2955
2944
* If the cache does not exist yet, if we are the first user of it, we
2956
2945
* create it asynchronously in a workqueue and let the current allocation
2957
2946
* go through with the original cache.
2958
- *
2959
- * This function takes a reference to the cache it returns to assure it
2960
- * won't get destroyed while we are working with it. Once the caller is
2961
- * done with it, memcg_kmem_put_cache() must be called to release the
2962
- * reference.
2963
2947
*/
2964
- struct kmem_cache * memcg_kmem_get_cache (struct kmem_cache * cachep ,
2965
- struct obj_cgroup * * objcgp )
2948
+ struct kmem_cache * memcg_kmem_get_cache (struct kmem_cache * cachep )
2966
2949
{
2967
- struct mem_cgroup * memcg ;
2968
2950
struct kmem_cache * memcg_cachep ;
2969
- struct memcg_cache_array * arr ;
2970
- int kmemcg_id ;
2971
2951
2972
- VM_BUG_ON (! is_root_cache ( cachep ) );
2973
-
2974
- if ( memcg_kmem_bypass ())
2952
+ memcg_cachep = READ_ONCE ( cachep -> memcg_params . memcg_cache );
2953
+ if ( unlikely (! memcg_cachep )) {
2954
+ memcg_schedule_kmem_cache_create ( cachep );
2975
2955
return cachep ;
2976
-
2977
- rcu_read_lock ();
2978
-
2979
- if (unlikely (current -> active_memcg ))
2980
- memcg = current -> active_memcg ;
2981
- else
2982
- memcg = mem_cgroup_from_task (current );
2983
-
2984
- if (!memcg || memcg == root_mem_cgroup )
2985
- goto out_unlock ;
2986
-
2987
- kmemcg_id = READ_ONCE (memcg -> kmemcg_id );
2988
- if (kmemcg_id < 0 )
2989
- goto out_unlock ;
2990
-
2991
- arr = rcu_dereference (cachep -> memcg_params .memcg_caches );
2992
-
2993
- /*
2994
- * Make sure we will access the up-to-date value. The code updating
2995
- * memcg_caches issues a write barrier to match the data dependency
2996
- * barrier inside READ_ONCE() (see memcg_create_kmem_cache()).
2997
- */
2998
- memcg_cachep = READ_ONCE (arr -> entries [kmemcg_id ]);
2999
-
3000
- /*
3001
- * If we are in a safe context (can wait, and not in interrupt
3002
- * context), we could be be predictable and return right away.
3003
- * This would guarantee that the allocation being performed
3004
- * already belongs in the new cache.
3005
- *
3006
- * However, there are some clashes that can arrive from locking.
3007
- * For instance, because we acquire the slab_mutex while doing
3008
- * memcg_create_kmem_cache, this means no further allocation
3009
- * could happen with the slab_mutex held. So it's better to
3010
- * defer everything.
3011
- *
3012
- * If the memcg is dying or memcg_cache is about to be released,
3013
- * don't bother creating new kmem_caches. Because memcg_cachep
3014
- * is ZEROed as the fist step of kmem offlining, we don't need
3015
- * percpu_ref_tryget_live() here. css_tryget_online() check in
3016
- * memcg_schedule_kmem_cache_create() will prevent us from
3017
- * creation of a new kmem_cache.
3018
- */
3019
- if (unlikely (!memcg_cachep ))
3020
- memcg_schedule_kmem_cache_create (memcg , cachep );
3021
- else if (percpu_ref_tryget (& memcg_cachep -> memcg_params .refcnt )) {
3022
- struct obj_cgroup * objcg = rcu_dereference (memcg -> objcg );
3023
-
3024
- if (!objcg || !obj_cgroup_tryget (objcg )) {
3025
- percpu_ref_put (& memcg_cachep -> memcg_params .refcnt );
3026
- goto out_unlock ;
3027
- }
3028
-
3029
- * objcgp = objcg ;
3030
- cachep = memcg_cachep ;
3031
2956
}
3032
- out_unlock :
3033
- rcu_read_unlock ();
3034
- return cachep ;
3035
- }
3036
2957
3037
- /**
3038
- * memcg_kmem_put_cache: drop reference taken by memcg_kmem_get_cache
3039
- * @cachep: the cache returned by memcg_kmem_get_cache
3040
- */
3041
- void memcg_kmem_put_cache (struct kmem_cache * cachep )
3042
- {
3043
- if (!is_root_cache (cachep ))
3044
- percpu_ref_put (& cachep -> memcg_params .refcnt );
2958
+ return memcg_cachep ;
3045
2959
}
3046
2960
3047
2961
/**
@@ -3731,7 +3645,6 @@ static int memcg_online_kmem(struct mem_cgroup *memcg)
3731
3645
*/
3732
3646
memcg -> kmemcg_id = memcg_id ;
3733
3647
memcg -> kmem_state = KMEM_ONLINE ;
3734
- INIT_LIST_HEAD (& memcg -> kmem_caches );
3735
3648
3736
3649
return 0 ;
3737
3650
}
@@ -3744,22 +3657,13 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
3744
3657
3745
3658
if (memcg -> kmem_state != KMEM_ONLINE )
3746
3659
return ;
3747
- /*
3748
- * Clear the online state before clearing memcg_caches array
3749
- * entries. The slab_mutex in memcg_deactivate_kmem_caches()
3750
- * guarantees that no cache will be created for this cgroup
3751
- * after we are done (see memcg_create_kmem_cache()).
3752
- */
3660
+
3753
3661
memcg -> kmem_state = KMEM_ALLOCATED ;
3754
3662
3755
3663
parent = parent_mem_cgroup (memcg );
3756
3664
if (!parent )
3757
3665
parent = root_mem_cgroup ;
3758
3666
3759
- /*
3760
- * Deactivate and reparent kmem_caches and objcgs.
3761
- */
3762
- memcg_deactivate_kmem_caches (memcg , parent );
3763
3667
memcg_reparent_objcgs (memcg , parent );
3764
3668
3765
3669
kmemcg_id = memcg -> kmemcg_id ;
@@ -5384,9 +5288,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
5384
5288
5385
5289
/* The following stuff does not apply to the root */
5386
5290
if (!parent ) {
5387
- #ifdef CONFIG_MEMCG_KMEM
5388
- INIT_LIST_HEAD (& memcg -> kmem_caches );
5389
- #endif
5390
5291
root_mem_cgroup = memcg ;
5391
5292
return & memcg -> css ;
5392
5293
}
0 commit comments