Skip to content

Commit c9c5962

Browse files
committed
mac80211: enable collecting station statistics per-CPU
If the driver advertises the new HW flag USE_RSS, make the station statistics on the fast-rx path per-CPU. This will enable calling the RX in parallel, only hitting locking or shared cachelines when the fast-RX path isn't available. Signed-off-by: Johannes Berg <[email protected]>
1 parent 49ddf8e commit c9c5962

File tree

5 files changed

+142
-46
lines changed

5 files changed

+142
-46
lines changed

include/net/mac80211.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1980,6 +1980,9 @@ struct ieee80211_txq {
19801980
* order and does not need to manage its own reorder buffer or BA session
19811981
* timeout.
19821982
*
1983+
* @IEEE80211_HW_USES_RSS: The device uses RSS and thus requires parallel RX,
1984+
* which implies using per-CPU station statistics.
1985+
*
19831986
* @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
19841987
*/
19851988
enum ieee80211_hw_flags {
@@ -2017,6 +2020,7 @@ enum ieee80211_hw_flags {
20172020
IEEE80211_HW_BEACON_TX_STATUS,
20182021
IEEE80211_HW_NEEDS_UNIQUE_STA_ADDR,
20192022
IEEE80211_HW_SUPPORTS_REORDERING_BUFFER,
2023+
IEEE80211_HW_USES_RSS,
20202024

20212025
/* keep last, obviously */
20222026
NUM_IEEE80211_HW_FLAGS

net/mac80211/debugfs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ static const char *hw_flag_names[] = {
127127
FLAG(BEACON_TX_STATUS),
128128
FLAG(NEEDS_UNIQUE_STA_ADDR),
129129
FLAG(SUPPORTS_REORDERING_BUFFER),
130+
FLAG(USES_RSS),
130131
#undef FLAG
131132
};
132133

net/mac80211/rx.c

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3528,6 +3528,8 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
35283528
ether_addr_copy(fastrx.rfc1042_hdr, rfc1042_header);
35293529
ether_addr_copy(fastrx.vif_addr, sdata->vif.addr);
35303530

3531+
fastrx.uses_rss = ieee80211_hw_check(&local->hw, USES_RSS);
3532+
35313533
/* fast-rx doesn't do reordering */
35323534
if (ieee80211_hw_check(&local->hw, AMPDU_AGGREGATION) &&
35333535
!ieee80211_hw_check(&local->hw, SUPPORTS_REORDERING_BUFFER))
@@ -3678,6 +3680,10 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
36783680
u8 da[ETH_ALEN];
36793681
u8 sa[ETH_ALEN];
36803682
} addrs __aligned(2);
3683+
struct ieee80211_sta_rx_stats *stats = &sta->rx_stats;
3684+
3685+
if (fast_rx->uses_rss)
3686+
stats = this_cpu_ptr(sta->pcpu_rx_stats);
36813687

36823688
/* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write
36833689
* to a common data structure; drivers can implement that per queue
@@ -3759,29 +3765,32 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
37593765
}
37603766

37613767
/* statistics part of ieee80211_rx_h_sta_process() */
3762-
sta->rx_stats.last_rx = jiffies;
3763-
sta->rx_stats.last_rate = sta_stats_encode_rate(status);
3768+
stats->last_rx = jiffies;
3769+
stats->last_rate = sta_stats_encode_rate(status);
37643770

3765-
sta->rx_stats.fragments++;
3771+
stats->fragments++;
37663772

37673773
if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
3768-
sta->rx_stats.last_signal = status->signal;
3769-
ewma_signal_add(&sta->rx_stats_avg.signal, -status->signal);
3774+
stats->last_signal = status->signal;
3775+
if (!fast_rx->uses_rss)
3776+
ewma_signal_add(&sta->rx_stats_avg.signal,
3777+
-status->signal);
37703778
}
37713779

37723780
if (status->chains) {
37733781
int i;
37743782

3775-
sta->rx_stats.chains = status->chains;
3783+
stats->chains = status->chains;
37763784
for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) {
37773785
int signal = status->chain_signal[i];
37783786

37793787
if (!(status->chains & BIT(i)))
37803788
continue;
37813789

3782-
sta->rx_stats.chain_signal_last[i] = signal;
3783-
ewma_signal_add(&sta->rx_stats_avg.chain_signal[i],
3784-
-signal);
3790+
stats->chain_signal_last[i] = signal;
3791+
if (!fast_rx->uses_rss)
3792+
ewma_signal_add(&sta->rx_stats_avg.chain_signal[i],
3793+
-signal);
37853794
}
37863795
}
37873796
/* end of statistics */
@@ -3806,10 +3815,10 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
38063815
* for non-QoS-data frames. Here we know it's a data
38073816
* frame, so count MSDUs.
38083817
*/
3809-
u64_stats_update_begin(&sta->rx_stats.syncp);
3810-
sta->rx_stats.msdu[rx->seqno_idx]++;
3811-
sta->rx_stats.bytes += orig_len;
3812-
u64_stats_update_end(&sta->rx_stats.syncp);
3818+
u64_stats_update_begin(&stats->syncp);
3819+
stats->msdu[rx->seqno_idx]++;
3820+
stats->bytes += orig_len;
3821+
u64_stats_update_end(&stats->syncp);
38133822

38143823
if (fast_rx->internal_forward) {
38153824
struct sta_info *dsta = sta_info_get(rx->sdata, skb->data);
@@ -3840,7 +3849,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
38403849
return true;
38413850
drop:
38423851
dev_kfree_skb(skb);
3843-
sta->rx_stats.dropped++;
3852+
stats->dropped++;
38443853
return true;
38453854
}
38463855

net/mac80211/sta_info.c

Lines changed: 92 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
254254
#ifdef CONFIG_MAC80211_MESH
255255
kfree(sta->mesh);
256256
#endif
257+
free_percpu(sta->pcpu_rx_stats);
257258
kfree(sta);
258259
}
259260

@@ -311,6 +312,13 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
311312
if (!sta)
312313
return NULL;
313314

315+
if (ieee80211_hw_check(hw, USES_RSS)) {
316+
sta->pcpu_rx_stats =
317+
alloc_percpu(struct ieee80211_sta_rx_stats);
318+
if (!sta->pcpu_rx_stats)
319+
goto free;
320+
}
321+
314322
spin_lock_init(&sta->lock);
315323
spin_lock_init(&sta->ps_lock);
316324
INIT_WORK(&sta->drv_deliver_wk, sta_deliver_ps_frames);
@@ -1932,6 +1940,28 @@ u8 sta_info_tx_streams(struct sta_info *sta)
19321940
>> IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT) + 1;
19331941
}
19341942

1943+
static struct ieee80211_sta_rx_stats *
1944+
sta_get_last_rx_stats(struct sta_info *sta)
1945+
{
1946+
struct ieee80211_sta_rx_stats *stats = &sta->rx_stats;
1947+
struct ieee80211_local *local = sta->local;
1948+
int cpu;
1949+
1950+
if (!ieee80211_hw_check(&local->hw, USES_RSS))
1951+
return stats;
1952+
1953+
for_each_possible_cpu(cpu) {
1954+
struct ieee80211_sta_rx_stats *cpustats;
1955+
1956+
cpustats = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
1957+
1958+
if (time_after(cpustats->last_rx, stats->last_rx))
1959+
stats = cpustats;
1960+
}
1961+
1962+
return stats;
1963+
}
1964+
19351965
static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
19361966
struct rate_info *rinfo)
19371967
{
@@ -1967,7 +1997,7 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
19671997

19681998
static void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
19691999
{
1970-
u16 rate = ACCESS_ONCE(sta->rx_stats.last_rate);
2000+
u16 rate = ACCESS_ONCE(sta_get_last_rx_stats(sta)->last_rate);
19712001

19722002
if (rate == STA_STATS_RATE_INVALID)
19732003
rinfo->flags = 0;
@@ -2010,13 +2040,29 @@ static void sta_set_tidstats(struct sta_info *sta,
20102040
}
20112041
}
20122042

2043+
static inline u64 sta_get_stats_bytes(struct ieee80211_sta_rx_stats *rxstats)
2044+
{
2045+
unsigned int start;
2046+
u64 value;
2047+
2048+
do {
2049+
start = u64_stats_fetch_begin(&rxstats->syncp);
2050+
value = rxstats->bytes;
2051+
} while (u64_stats_fetch_retry(&rxstats->syncp, start));
2052+
2053+
return value;
2054+
}
2055+
20132056
void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
20142057
{
20152058
struct ieee80211_sub_if_data *sdata = sta->sdata;
20162059
struct ieee80211_local *local = sdata->local;
20172060
struct rate_control_ref *ref = NULL;
20182061
u32 thr = 0;
2019-
int i, ac;
2062+
int i, ac, cpu;
2063+
struct ieee80211_sta_rx_stats *last_rxstats;
2064+
2065+
last_rxstats = sta_get_last_rx_stats(sta);
20202066

20212067
if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
20222068
ref = local->rate_ctrl;
@@ -2064,17 +2110,30 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
20642110

20652111
if (!(sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES64) |
20662112
BIT(NL80211_STA_INFO_RX_BYTES)))) {
2067-
unsigned int start;
2113+
sinfo->rx_bytes += sta_get_stats_bytes(&sta->rx_stats);
2114+
2115+
if (sta->pcpu_rx_stats) {
2116+
for_each_possible_cpu(cpu) {
2117+
struct ieee80211_sta_rx_stats *cpurxs;
2118+
2119+
cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
2120+
sinfo->rx_bytes += sta_get_stats_bytes(cpurxs);
2121+
}
2122+
}
20682123

2069-
do {
2070-
start = u64_stats_fetch_begin(&sta->rx_stats.syncp);
2071-
sinfo->rx_bytes = sta->rx_stats.bytes;
2072-
} while (u64_stats_fetch_retry(&sta->rx_stats.syncp, start));
20732124
sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64);
20742125
}
20752126

20762127
if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_PACKETS))) {
20772128
sinfo->rx_packets = sta->rx_stats.packets;
2129+
if (sta->pcpu_rx_stats) {
2130+
for_each_possible_cpu(cpu) {
2131+
struct ieee80211_sta_rx_stats *cpurxs;
2132+
2133+
cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
2134+
sinfo->rx_packets += cpurxs->packets;
2135+
}
2136+
}
20782137
sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS);
20792138
}
20802139

@@ -2089,6 +2148,14 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
20892148
}
20902149

20912150
sinfo->rx_dropped_misc = sta->rx_stats.dropped;
2151+
if (sta->pcpu_rx_stats) {
2152+
for_each_possible_cpu(cpu) {
2153+
struct ieee80211_sta_rx_stats *cpurxs;
2154+
2155+
cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
2156+
sinfo->rx_packets += cpurxs->dropped;
2157+
}
2158+
}
20922159

20932160
if (sdata->vif.type == NL80211_IFTYPE_STATION &&
20942161
!(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) {
@@ -2100,27 +2167,34 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
21002167
if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) ||
21012168
ieee80211_hw_check(&sta->local->hw, SIGNAL_UNSPEC)) {
21022169
if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) {
2103-
sinfo->signal = (s8)sta->rx_stats.last_signal;
2170+
sinfo->signal = (s8)last_rxstats->last_signal;
21042171
sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
21052172
}
21062173

2107-
if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) {
2174+
if (!sta->pcpu_rx_stats &&
2175+
!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) {
21082176
sinfo->signal_avg =
21092177
-ewma_signal_read(&sta->rx_stats_avg.signal);
21102178
sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG);
21112179
}
21122180
}
21132181

2114-
if (sta->rx_stats.chains &&
2182+
/* for the average - if pcpu_rx_stats isn't set - rxstats must point to
2183+
* the sta->rx_stats struct, so the check here is fine with and without
2184+
* pcpu statistics
2185+
*/
2186+
if (last_rxstats->chains &&
21152187
!(sinfo->filled & (BIT(NL80211_STA_INFO_CHAIN_SIGNAL) |
21162188
BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) {
2117-
sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL) |
2118-
BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG);
2189+
sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL);
2190+
if (!sta->pcpu_rx_stats)
2191+
sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG);
2192+
2193+
sinfo->chains = last_rxstats->chains;
21192194

2120-
sinfo->chains = sta->rx_stats.chains;
21212195
for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) {
21222196
sinfo->chain_signal[i] =
2123-
sta->rx_stats.chain_signal_last[i];
2197+
last_rxstats->chain_signal_last[i];
21242198
sinfo->chain_signal_avg[i] =
21252199
-ewma_signal_read(&sta->rx_stats_avg.chain_signal[i]);
21262200
}
@@ -2213,7 +2287,9 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
22132287

22142288
unsigned long ieee80211_sta_last_active(struct sta_info *sta)
22152289
{
2216-
if (time_after(sta->rx_stats.last_rx, sta->status_stats.last_ack))
2217-
return sta->rx_stats.last_rx;
2290+
struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta);
2291+
2292+
if (time_after(stats->last_rx, sta->status_stats.last_ack))
2293+
return stats->last_rx;
22182294
return sta->status_stats.last_ack;
22192295
}

net/mac80211/sta_info.h

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ struct ieee80211_fast_tx {
297297
* @key: bool indicating encryption is expected (key is set)
298298
* @sta_notify: notify the MLME code (once)
299299
* @internal_forward: forward froms internally on AP/VLAN type interfaces
300+
* @uses_rss: copy of USES_RSS hw flag
300301
* @da_offs: offset of the DA in the header (for header conversion)
301302
* @sa_offs: offset of the SA in the header (for header conversion)
302303
* @rcu_head: RCU head for freeing this structure
@@ -311,7 +312,8 @@ struct ieee80211_fast_rx {
311312
u8 icv_len;
312313
u8 key:1,
313314
sta_notify:1,
314-
internal_forward:1;
315+
internal_forward:1,
316+
uses_rss:1;
315317
u8 da_offs, sa_offs;
316318

317319
struct rcu_head rcu_head;
@@ -367,6 +369,21 @@ struct mesh_sta {
367369

368370
DECLARE_EWMA(signal, 1024, 8)
369371

372+
struct ieee80211_sta_rx_stats {
373+
unsigned long packets;
374+
unsigned long last_rx;
375+
unsigned long num_duplicates;
376+
unsigned long fragments;
377+
unsigned long dropped;
378+
int last_signal;
379+
u8 chains;
380+
s8 chain_signal_last[IEEE80211_MAX_CHAINS];
381+
u16 last_rate;
382+
struct u64_stats_sync syncp;
383+
u64 bytes;
384+
u64 msdu[IEEE80211_NUM_TIDS + 1];
385+
};
386+
370387
/**
371388
* struct sta_info - STA information
372389
*
@@ -428,6 +445,8 @@ DECLARE_EWMA(signal, 1024, 8)
428445
* the BSS one.
429446
* @tx_stats: TX statistics
430447
* @rx_stats: RX statistics
448+
* @pcpu_rx_stats: per-CPU RX statistics, assigned only if the driver needs
449+
* this (by advertising the USES_RSS hw flag)
431450
* @status_stats: TX status statistics
432451
*/
433452
struct sta_info {
@@ -448,6 +467,7 @@ struct sta_info {
448467

449468
struct ieee80211_fast_tx __rcu *fast_tx;
450469
struct ieee80211_fast_rx __rcu *fast_rx;
470+
struct ieee80211_sta_rx_stats __percpu *pcpu_rx_stats;
451471

452472
#ifdef CONFIG_MAC80211_MESH
453473
struct mesh_sta *mesh;
@@ -477,21 +497,7 @@ struct sta_info {
477497
long last_connected;
478498

479499
/* Updated from RX path only, no locking requirements */
480-
struct {
481-
unsigned long packets;
482-
unsigned long last_rx;
483-
unsigned long num_duplicates;
484-
unsigned long fragments;
485-
unsigned long dropped;
486-
int last_signal;
487-
u8 chains;
488-
s8 chain_signal_last[IEEE80211_MAX_CHAINS];
489-
u16 last_rate;
490-
491-
struct u64_stats_sync syncp;
492-
u64 bytes;
493-
u64 msdu[IEEE80211_NUM_TIDS + 1];
494-
} rx_stats;
500+
struct ieee80211_sta_rx_stats rx_stats;
495501
struct {
496502
struct ewma_signal signal;
497503
struct ewma_signal chain_signal[IEEE80211_MAX_CHAINS];

0 commit comments

Comments
 (0)