Skip to content

Commit b5a54d9

Browse files
edumazetdavem330
authored andcommitted
mlx4: use order-0 pages for RX
Use of order-3 pages is problematic in some cases. This patch might add three kinds of regression : 1) a CPU performance regression, but we will add later page recycling and performance should be back. 2) TCP receiver could grow its receive window slightly slower, because skb->len/skb->truesize ratio will decrease. This is mostly ok, we prefer being conservative to not risk OOM, and eventually tune TCP better in the future. This is consistent with other drivers using 2048 per ethernet frame. 3) Because we allocate one page per RX slot, we consume more memory for the ring buffers. XDP already had this constraint anyway. Signed-off-by: Eric Dumazet <[email protected]> Acked-by: Tariq Toukan <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 60c7f5a commit b5a54d9

File tree

2 files changed

+33
-44
lines changed

2 files changed

+33
-44
lines changed

drivers/net/ethernet/mellanox/mlx4/en_rx.c

Lines changed: 33 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -53,38 +53,26 @@
5353
static int mlx4_alloc_pages(struct mlx4_en_priv *priv,
5454
struct mlx4_en_rx_alloc *page_alloc,
5555
const struct mlx4_en_frag_info *frag_info,
56-
gfp_t _gfp)
56+
gfp_t gfp)
5757
{
58-
int order;
5958
struct page *page;
6059
dma_addr_t dma;
6160

62-
for (order = priv->rx_page_order; ;) {
63-
gfp_t gfp = _gfp;
64-
65-
if (order)
66-
gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NOMEMALLOC;
67-
page = alloc_pages(gfp, order);
68-
if (likely(page))
69-
break;
70-
if (--order < 0 ||
71-
((PAGE_SIZE << order) < frag_info->frag_size))
72-
return -ENOMEM;
73-
}
74-
dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order,
75-
priv->dma_dir);
61+
page = alloc_page(gfp);
62+
if (unlikely(!page))
63+
return -ENOMEM;
64+
dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE, priv->dma_dir);
7665
if (unlikely(dma_mapping_error(priv->ddev, dma))) {
7766
put_page(page);
7867
return -ENOMEM;
7968
}
80-
page_alloc->page_size = PAGE_SIZE << order;
8169
page_alloc->page = page;
8270
page_alloc->dma = dma;
8371
page_alloc->page_offset = 0;
8472
/* Not doing get_page() for each frag is a big win
8573
* on asymetric workloads. Note we can not use atomic_set().
8674
*/
87-
page_ref_add(page, page_alloc->page_size / frag_info->frag_stride - 1);
75+
page_ref_add(page, PAGE_SIZE / frag_info->frag_stride - 1);
8876
return 0;
8977
}
9078

@@ -105,7 +93,7 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
10593
page_alloc[i].page_offset += frag_info->frag_stride;
10694

10795
if (page_alloc[i].page_offset + frag_info->frag_stride <=
108-
ring_alloc[i].page_size)
96+
PAGE_SIZE)
10997
continue;
11098

11199
if (unlikely(mlx4_alloc_pages(priv, &page_alloc[i],
@@ -127,11 +115,10 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
127115
while (i--) {
128116
if (page_alloc[i].page != ring_alloc[i].page) {
129117
dma_unmap_page(priv->ddev, page_alloc[i].dma,
130-
page_alloc[i].page_size,
131-
priv->dma_dir);
118+
PAGE_SIZE, priv->dma_dir);
132119
page = page_alloc[i].page;
133120
/* Revert changes done by mlx4_alloc_pages */
134-
page_ref_sub(page, page_alloc[i].page_size /
121+
page_ref_sub(page, PAGE_SIZE /
135122
priv->frag_info[i].frag_stride - 1);
136123
put_page(page);
137124
}
@@ -147,8 +134,8 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv,
147134
u32 next_frag_end = frags[i].page_offset + 2 * frag_info->frag_stride;
148135

149136

150-
if (next_frag_end > frags[i].page_size)
151-
dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size,
137+
if (next_frag_end > PAGE_SIZE)
138+
dma_unmap_page(priv->ddev, frags[i].dma, PAGE_SIZE,
152139
priv->dma_dir);
153140

154141
if (frags[i].page)
@@ -168,9 +155,8 @@ static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
168155
frag_info, GFP_KERNEL | __GFP_COLD))
169156
goto out;
170157

171-
en_dbg(DRV, priv, " frag %d allocator: - size:%d frags:%d\n",
172-
i, ring->page_alloc[i].page_size,
173-
page_ref_count(ring->page_alloc[i].page));
158+
en_dbg(DRV, priv, " frag %d allocator: - frags:%d\n",
159+
i, page_ref_count(ring->page_alloc[i].page));
174160
}
175161
return 0;
176162

@@ -180,11 +166,10 @@ static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
180166

181167
page_alloc = &ring->page_alloc[i];
182168
dma_unmap_page(priv->ddev, page_alloc->dma,
183-
page_alloc->page_size,
184-
priv->dma_dir);
169+
PAGE_SIZE, priv->dma_dir);
185170
page = page_alloc->page;
186171
/* Revert changes done by mlx4_alloc_pages */
187-
page_ref_sub(page, page_alloc->page_size /
172+
page_ref_sub(page, PAGE_SIZE /
188173
priv->frag_info[i].frag_stride - 1);
189174
put_page(page);
190175
page_alloc->page = NULL;
@@ -206,9 +191,9 @@ static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv,
206191
i, page_count(page_alloc->page));
207192

208193
dma_unmap_page(priv->ddev, page_alloc->dma,
209-
page_alloc->page_size, priv->dma_dir);
194+
PAGE_SIZE, priv->dma_dir);
210195
while (page_alloc->page_offset + frag_info->frag_stride <
211-
page_alloc->page_size) {
196+
PAGE_SIZE) {
212197
put_page(page_alloc->page);
213198
page_alloc->page_offset += frag_info->frag_stride;
214199
}
@@ -1191,7 +1176,6 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
11911176
* This only works when num_frags == 1.
11921177
*/
11931178
if (priv->tx_ring_num[TX_XDP]) {
1194-
priv->rx_page_order = 0;
11951179
priv->frag_info[0].frag_size = eff_mtu;
11961180
/* This will gain efficient xdp frame recycling at the
11971181
* expense of more costly truesize accounting
@@ -1201,22 +1185,32 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
12011185
priv->rx_headroom = XDP_PACKET_HEADROOM;
12021186
i = 1;
12031187
} else {
1204-
int buf_size = 0;
1188+
int frag_size_max = 2048, buf_size = 0;
1189+
1190+
/* should not happen, right ? */
1191+
if (eff_mtu > PAGE_SIZE + (MLX4_EN_MAX_RX_FRAGS - 1) * 2048)
1192+
frag_size_max = PAGE_SIZE;
12051193

12061194
while (buf_size < eff_mtu) {
1207-
int frag_size = eff_mtu - buf_size;
1195+
int frag_stride, frag_size = eff_mtu - buf_size;
1196+
int pad, nb;
12081197

12091198
if (i < MLX4_EN_MAX_RX_FRAGS - 1)
1210-
frag_size = min(frag_size, 2048);
1199+
frag_size = min(frag_size, frag_size_max);
12111200

12121201
priv->frag_info[i].frag_size = frag_size;
1202+
frag_stride = ALIGN(frag_size, SMP_CACHE_BYTES);
1203+
/* We can only pack 2 1536-bytes frames in on 4K page
1204+
* Therefore, each frame would consume more bytes (truesize)
1205+
*/
1206+
nb = PAGE_SIZE / frag_stride;
1207+
pad = (PAGE_SIZE - nb * frag_stride) / nb;
1208+
pad &= ~(SMP_CACHE_BYTES - 1);
1209+
priv->frag_info[i].frag_stride = frag_stride + pad;
12131210

1214-
priv->frag_info[i].frag_stride = ALIGN(frag_size,
1215-
SMP_CACHE_BYTES);
12161211
buf_size += frag_size;
12171212
i++;
12181213
}
1219-
priv->rx_page_order = MLX4_EN_ALLOC_PREFER_ORDER;
12201214
priv->dma_dir = PCI_DMA_FROMDEVICE;
12211215
priv->rx_headroom = 0;
12221216
}

drivers/net/ethernet/mellanox/mlx4/mlx4_en.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,6 @@
102102
/* Use the maximum between 16384 and a single page */
103103
#define MLX4_EN_ALLOC_SIZE PAGE_ALIGN(16384)
104104

105-
#define MLX4_EN_ALLOC_PREFER_ORDER min_t(int, get_order(32768), \
106-
PAGE_ALLOC_COSTLY_ORDER)
107-
108105
#define MLX4_EN_MAX_RX_FRAGS 4
109106

110107
/* Maximum ring sizes */
@@ -256,7 +253,6 @@ struct mlx4_en_rx_alloc {
256253
struct page *page;
257254
dma_addr_t dma;
258255
u32 page_offset;
259-
u32 page_size;
260256
};
261257

262258
#define MLX4_EN_CACHE_SIZE (2 * NAPI_POLL_WEIGHT)
@@ -579,7 +575,6 @@ struct mlx4_en_priv {
579575
u8 num_frags;
580576
u8 log_rx_info;
581577
u8 dma_dir;
582-
u8 rx_page_order;
583578
u16 rx_headroom;
584579

585580
struct mlx4_en_tx_ring **tx_ring[MLX4_EN_NUM_TX_TYPES];

0 commit comments

Comments
 (0)