16
16
#include <linux/vmalloc.h>
17
17
#include <linux/stddef.h>
18
18
#include <linux/err.h>
19
- #include <linux/percpu.h>
20
19
#include <linux/kernel.h>
21
20
#include <linux/netdevice.h>
22
21
#include <linux/slab.h>
29
28
30
29
static DEFINE_MUTEX (nf_ct_ecache_mutex );
31
30
32
- #define ECACHE_RETRY_WAIT (HZ/10)
33
- #define ECACHE_STACK_ALLOC (256 / sizeof(void *))
31
+ #define DYING_NULLS_VAL ((1 << 30) + 1)
32
+ #define ECACHE_MAX_JIFFIES msecs_to_jiffies(10)
33
+ #define ECACHE_RETRY_JIFFIES msecs_to_jiffies(10)
34
34
35
35
enum retry_state {
36
36
STATE_CONGESTED ,
37
37
STATE_RESTART ,
38
38
STATE_DONE ,
39
39
};
40
40
41
- static enum retry_state ecache_work_evict_list (struct ct_pcpu * pcpu )
41
+ static enum retry_state ecache_work_evict_list (struct nf_conntrack_net * cnet )
42
42
{
43
- struct nf_conn * refs [ECACHE_STACK_ALLOC ];
43
+ unsigned long stop = jiffies + ECACHE_MAX_JIFFIES ;
44
+ struct hlist_nulls_head evicted_list ;
44
45
enum retry_state ret = STATE_DONE ;
45
46
struct nf_conntrack_tuple_hash * h ;
46
47
struct hlist_nulls_node * n ;
47
- unsigned int evicted = 0 ;
48
+ unsigned int sent ;
48
49
49
- spin_lock ( & pcpu -> lock );
50
+ INIT_HLIST_NULLS_HEAD ( & evicted_list , DYING_NULLS_VAL );
50
51
51
- hlist_nulls_for_each_entry (h , n , & pcpu -> dying , hnnode ) {
52
+ next :
53
+ sent = 0 ;
54
+ spin_lock_bh (& cnet -> ecache .dying_lock );
55
+
56
+ hlist_nulls_for_each_entry_safe (h , n , & cnet -> ecache .dying_list , hnnode ) {
52
57
struct nf_conn * ct = nf_ct_tuplehash_to_ctrack (h );
53
- struct nf_conntrack_ecache * e ;
54
-
55
- if (!nf_ct_is_confirmed (ct ))
56
- continue ;
57
-
58
- /* This ecache access is safe because the ct is on the
59
- * pcpu dying list and we hold the spinlock -- the entry
60
- * cannot be free'd until after the lock is released.
61
- *
62
- * This is true even if ct has a refcount of 0: the
63
- * cpu that is about to free the entry must remove it
64
- * from the dying list and needs the lock to do so.
65
- */
66
- e = nf_ct_ecache_find (ct );
67
- if (!e || e -> state != NFCT_ECACHE_DESTROY_FAIL )
68
- continue ;
69
58
70
- /* ct is in NFCT_ECACHE_DESTROY_FAIL state, this means
71
- * the worker owns this entry: the ct will remain valid
72
- * until the worker puts its ct reference.
59
+ /* The worker owns all entries, ct remains valid until nf_ct_put
60
+ * in the loop below.
73
61
*/
74
62
if (nf_conntrack_event (IPCT_DESTROY , ct )) {
75
63
ret = STATE_CONGESTED ;
76
64
break ;
77
65
}
78
66
79
- e -> state = NFCT_ECACHE_DESTROY_SENT ;
80
- refs [ evicted ] = ct ;
67
+ hlist_nulls_del_rcu ( & ct -> tuplehash [ IP_CT_DIR_ORIGINAL ]. hnnode ) ;
68
+ hlist_nulls_add_head ( & ct -> tuplehash [ IP_CT_DIR_REPLY ]. hnnode , & evicted_list ) ;
81
69
82
- if (++ evicted >= ARRAY_SIZE ( refs )) {
70
+ if (time_after ( stop , jiffies )) {
83
71
ret = STATE_RESTART ;
84
72
break ;
85
73
}
74
+
75
+ if (sent ++ > 16 ) {
76
+ spin_unlock_bh (& cnet -> ecache .dying_lock );
77
+ cond_resched ();
78
+ goto next ;
79
+ }
86
80
}
87
81
88
- spin_unlock ( & pcpu -> lock );
82
+ spin_unlock_bh ( & cnet -> ecache . dying_lock );
89
83
90
- /* can't _put while holding lock */
91
- while (evicted )
92
- nf_ct_put (refs [-- evicted ]);
84
+ hlist_nulls_for_each_entry_safe (h , n , & evicted_list , hnnode ) {
85
+ struct nf_conn * ct = nf_ct_tuplehash_to_ctrack (h );
86
+
87
+ hlist_nulls_add_fake (& ct -> tuplehash [IP_CT_DIR_ORIGINAL ].hnnode );
88
+ hlist_nulls_del_rcu (& ct -> tuplehash [IP_CT_DIR_REPLY ].hnnode );
89
+ nf_ct_put (ct );
90
+
91
+ cond_resched ();
92
+ }
93
93
94
94
return ret ;
95
95
}
96
96
97
97
static void ecache_work (struct work_struct * work )
98
98
{
99
99
struct nf_conntrack_net * cnet = container_of (work , struct nf_conntrack_net , ecache .dwork .work );
100
- struct netns_ct * ctnet = cnet -> ecache .ct_net ;
101
- int cpu , delay = -1 ;
102
- struct ct_pcpu * pcpu ;
103
-
104
- local_bh_disable ();
105
-
106
- for_each_possible_cpu (cpu ) {
107
- enum retry_state ret ;
108
-
109
- pcpu = per_cpu_ptr (ctnet -> pcpu_lists , cpu );
110
-
111
- ret = ecache_work_evict_list (pcpu );
112
-
113
- switch (ret ) {
114
- case STATE_CONGESTED :
115
- delay = ECACHE_RETRY_WAIT ;
116
- goto out ;
117
- case STATE_RESTART :
118
- delay = 0 ;
119
- break ;
120
- case STATE_DONE :
121
- break ;
122
- }
100
+ int ret , delay = -1 ;
101
+
102
+ ret = ecache_work_evict_list (cnet );
103
+ switch (ret ) {
104
+ case STATE_CONGESTED :
105
+ delay = ECACHE_RETRY_JIFFIES ;
106
+ break ;
107
+ case STATE_RESTART :
108
+ delay = 0 ;
109
+ break ;
110
+ case STATE_DONE :
111
+ break ;
123
112
}
124
113
125
- out :
126
- local_bh_enable ();
127
-
128
- ctnet -> ecache_dwork_pending = delay > 0 ;
129
114
if (delay >= 0 )
130
115
schedule_delayed_work (& cnet -> ecache .dwork , delay );
131
116
}
@@ -199,7 +184,6 @@ int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct,
199
184
*/
200
185
if (e -> portid == 0 && portid != 0 )
201
186
e -> portid = portid ;
202
- e -> state = NFCT_ECACHE_DESTROY_FAIL ;
203
187
}
204
188
205
189
return ret ;
@@ -297,8 +281,10 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state)
297
281
schedule_delayed_work (& cnet -> ecache .dwork , HZ );
298
282
net -> ct .ecache_dwork_pending = true;
299
283
} else if (state == NFCT_ECACHE_DESTROY_SENT ) {
300
- net -> ct .ecache_dwork_pending = false;
301
- mod_delayed_work (system_wq , & cnet -> ecache .dwork , 0 );
284
+ if (!hlist_nulls_empty (& cnet -> ecache .dying_list ))
285
+ mod_delayed_work (system_wq , & cnet -> ecache .dwork , 0 );
286
+ else
287
+ net -> ct .ecache_dwork_pending = false;
302
288
}
303
289
}
304
290
@@ -311,8 +297,9 @@ void nf_conntrack_ecache_pernet_init(struct net *net)
311
297
312
298
net -> ct .sysctl_events = nf_ct_events ;
313
299
314
- cnet -> ecache .ct_net = & net -> ct ;
315
300
INIT_DELAYED_WORK (& cnet -> ecache .dwork , ecache_work );
301
+ INIT_HLIST_NULLS_HEAD (& cnet -> ecache .dying_list , DYING_NULLS_VAL );
302
+ spin_lock_init (& cnet -> ecache .dying_lock );
316
303
317
304
BUILD_BUG_ON (__IPCT_MAX >= 16 ); /* e->ctmask is u16 */
318
305
}
0 commit comments