Skip to content

Commit 51151a1

Browse files
Eric Dumazetdavem330
authored andcommitted
mlx4: allow order-0 memory allocations in RX path
Signed-off-by: Eric Dumazet <[email protected]> mlx4 exclusively uses order-2 allocations in RX path, which are likely to fail under memory pressure. We therefore drop frames more than needed. This patch tries order-3, order-2, order-1 and finally order-0 allocations to keep good performance, yet allow allocations if/when memory gets fragmented. By using larger pages, and avoiding unnecessary get_page()/put_page() on compound pages, this patch improves performance as well, lowering false sharing on struct page. Also use GFP_KERNEL allocations in initialization path, as allocating 12 MB (390 order-3 pages) can easily fail with GFP_ATOMIC. Signed-off-by: Eric Dumazet <[email protected]> Cc: Amir Vadai <[email protected]> Acked-by: Or Gerlitz <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 3bae9db commit 51151a1

File tree

2 files changed

+95
-86
lines changed

2 files changed

+95
-86
lines changed

drivers/net/ethernet/mellanox/mlx4/en_rx.c

Lines changed: 89 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -43,40 +43,64 @@
4343

4444
#include "mlx4_en.h"
4545

46+
static int mlx4_alloc_pages(struct mlx4_en_priv *priv,
47+
struct mlx4_en_rx_alloc *page_alloc,
48+
const struct mlx4_en_frag_info *frag_info,
49+
gfp_t _gfp)
50+
{
51+
int order;
52+
struct page *page;
53+
dma_addr_t dma;
54+
55+
for (order = MLX4_EN_ALLOC_PREFER_ORDER; ;) {
56+
gfp_t gfp = _gfp;
57+
58+
if (order)
59+
gfp |= __GFP_COMP | __GFP_NOWARN;
60+
page = alloc_pages(gfp, order);
61+
if (likely(page))
62+
break;
63+
if (--order < 0 ||
64+
((PAGE_SIZE << order) < frag_info->frag_size))
65+
return -ENOMEM;
66+
}
67+
dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order,
68+
PCI_DMA_FROMDEVICE);
69+
if (dma_mapping_error(priv->ddev, dma)) {
70+
put_page(page);
71+
return -ENOMEM;
72+
}
73+
page_alloc->size = PAGE_SIZE << order;
74+
page_alloc->page = page;
75+
page_alloc->dma = dma;
76+
page_alloc->offset = frag_info->frag_align;
77+
/* Not doing get_page() for each frag is a big win
78+
* on asymetric workloads.
79+
*/
80+
atomic_set(&page->_count, page_alloc->size / frag_info->frag_stride);
81+
return 0;
82+
}
83+
4684
static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
4785
struct mlx4_en_rx_desc *rx_desc,
4886
struct mlx4_en_rx_alloc *frags,
49-
struct mlx4_en_rx_alloc *ring_alloc)
87+
struct mlx4_en_rx_alloc *ring_alloc,
88+
gfp_t gfp)
5089
{
5190
struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS];
52-
struct mlx4_en_frag_info *frag_info;
91+
const struct mlx4_en_frag_info *frag_info;
5392
struct page *page;
5493
dma_addr_t dma;
5594
int i;
5695

5796
for (i = 0; i < priv->num_frags; i++) {
5897
frag_info = &priv->frag_info[i];
59-
if (ring_alloc[i].offset == frag_info->last_offset) {
60-
page = alloc_pages(GFP_ATOMIC | __GFP_COMP,
61-
MLX4_EN_ALLOC_ORDER);
62-
if (!page)
63-
goto out;
64-
dma = dma_map_page(priv->ddev, page, 0,
65-
MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE);
66-
if (dma_mapping_error(priv->ddev, dma)) {
67-
put_page(page);
68-
goto out;
69-
}
70-
page_alloc[i].page = page;
71-
page_alloc[i].dma = dma;
72-
page_alloc[i].offset = frag_info->frag_align;
73-
} else {
74-
page_alloc[i].page = ring_alloc[i].page;
75-
get_page(ring_alloc[i].page);
76-
page_alloc[i].dma = ring_alloc[i].dma;
77-
page_alloc[i].offset = ring_alloc[i].offset +
78-
frag_info->frag_stride;
79-
}
98+
page_alloc[i] = ring_alloc[i];
99+
page_alloc[i].offset += frag_info->frag_stride;
100+
if (page_alloc[i].offset + frag_info->frag_stride <= ring_alloc[i].size)
101+
continue;
102+
if (mlx4_alloc_pages(priv, &page_alloc[i], frag_info, gfp))
103+
goto out;
80104
}
81105

82106
for (i = 0; i < priv->num_frags; i++) {
@@ -88,14 +112,16 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
88112

89113
return 0;
90114

91-
92115
out:
93116
while (i--) {
94117
frag_info = &priv->frag_info[i];
95-
if (ring_alloc[i].offset == frag_info->last_offset)
118+
if (page_alloc[i].page != ring_alloc[i].page) {
96119
dma_unmap_page(priv->ddev, page_alloc[i].dma,
97-
MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE);
98-
put_page(page_alloc[i].page);
120+
page_alloc[i].size, PCI_DMA_FROMDEVICE);
121+
page = page_alloc[i].page;
122+
atomic_set(&page->_count, 1);
123+
put_page(page);
124+
}
99125
}
100126
return -ENOMEM;
101127
}
@@ -104,48 +130,41 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv,
104130
struct mlx4_en_rx_alloc *frags,
105131
int i)
106132
{
107-
struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
133+
const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
108134

109-
if (frags[i].offset == frag_info->last_offset) {
110-
dma_unmap_page(priv->ddev, frags[i].dma, MLX4_EN_ALLOC_SIZE,
135+
if (frags[i].offset + frag_info->frag_stride > frags[i].size)
136+
dma_unmap_page(priv->ddev, frags[i].dma, frags[i].size,
111137
PCI_DMA_FROMDEVICE);
112-
}
138+
113139
if (frags[i].page)
114140
put_page(frags[i].page);
115141
}
116142

117143
static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
118144
struct mlx4_en_rx_ring *ring)
119145
{
120-
struct mlx4_en_rx_alloc *page_alloc;
121146
int i;
147+
struct mlx4_en_rx_alloc *page_alloc;
122148

123149
for (i = 0; i < priv->num_frags; i++) {
124-
page_alloc = &ring->page_alloc[i];
125-
page_alloc->page = alloc_pages(GFP_ATOMIC | __GFP_COMP,
126-
MLX4_EN_ALLOC_ORDER);
127-
if (!page_alloc->page)
128-
goto out;
150+
const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
129151

130-
page_alloc->dma = dma_map_page(priv->ddev, page_alloc->page, 0,
131-
MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE);
132-
if (dma_mapping_error(priv->ddev, page_alloc->dma)) {
133-
put_page(page_alloc->page);
134-
page_alloc->page = NULL;
152+
if (mlx4_alloc_pages(priv, &ring->page_alloc[i],
153+
frag_info, GFP_KERNEL))
135154
goto out;
136-
}
137-
page_alloc->offset = priv->frag_info[i].frag_align;
138-
en_dbg(DRV, priv, "Initialized allocator:%d with page:%p\n",
139-
i, page_alloc->page);
140155
}
141156
return 0;
142157

143158
out:
144159
while (i--) {
160+
struct page *page;
161+
145162
page_alloc = &ring->page_alloc[i];
146163
dma_unmap_page(priv->ddev, page_alloc->dma,
147-
MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE);
148-
put_page(page_alloc->page);
164+
page_alloc->size, PCI_DMA_FROMDEVICE);
165+
page = page_alloc->page;
166+
atomic_set(&page->_count, 1);
167+
put_page(page);
149168
page_alloc->page = NULL;
150169
}
151170
return -ENOMEM;
@@ -158,13 +177,18 @@ static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv,
158177
int i;
159178

160179
for (i = 0; i < priv->num_frags; i++) {
180+
const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
181+
161182
page_alloc = &ring->page_alloc[i];
162183
en_dbg(DRV, priv, "Freeing allocator:%d count:%d\n",
163184
i, page_count(page_alloc->page));
164185

165186
dma_unmap_page(priv->ddev, page_alloc->dma,
166-
MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE);
167-
put_page(page_alloc->page);
187+
page_alloc->size, PCI_DMA_FROMDEVICE);
188+
while (page_alloc->offset + frag_info->frag_stride < page_alloc->size) {
189+
put_page(page_alloc->page);
190+
page_alloc->offset += frag_info->frag_stride;
191+
}
168192
page_alloc->page = NULL;
169193
}
170194
}
@@ -195,13 +219,14 @@ static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv,
195219
}
196220

197221
static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
198-
struct mlx4_en_rx_ring *ring, int index)
222+
struct mlx4_en_rx_ring *ring, int index,
223+
gfp_t gfp)
199224
{
200225
struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride);
201226
struct mlx4_en_rx_alloc *frags = ring->rx_info +
202227
(index << priv->log_rx_info);
203228

204-
return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc);
229+
return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc, gfp);
205230
}
206231

207232
static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring)
@@ -235,7 +260,8 @@ static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv)
235260
ring = &priv->rx_ring[ring_ind];
236261

237262
if (mlx4_en_prepare_rx_desc(priv, ring,
238-
ring->actual_size)) {
263+
ring->actual_size,
264+
GFP_KERNEL)) {
239265
if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) {
240266
en_err(priv, "Failed to allocate "
241267
"enough rx buffers\n");
@@ -450,11 +476,11 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
450476
DMA_FROM_DEVICE);
451477

452478
/* Save page reference in skb */
453-
get_page(frags[nr].page);
454479
__skb_frag_set_page(&skb_frags_rx[nr], frags[nr].page);
455480
skb_frag_size_set(&skb_frags_rx[nr], frag_info->frag_size);
456481
skb_frags_rx[nr].page_offset = frags[nr].offset;
457482
skb->truesize += frag_info->frag_stride;
483+
frags[nr].page = NULL;
458484
}
459485
/* Adjust size of last fragment to match actual length */
460486
if (nr > 0)
@@ -547,7 +573,7 @@ static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
547573
int index = ring->prod & ring->size_mask;
548574

549575
while ((u32) (ring->prod - ring->cons) < ring->actual_size) {
550-
if (mlx4_en_prepare_rx_desc(priv, ring, index))
576+
if (mlx4_en_prepare_rx_desc(priv, ring, index, GFP_ATOMIC))
551577
break;
552578
ring->prod++;
553579
index = ring->prod & ring->size_mask;
@@ -805,21 +831,7 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
805831
return done;
806832
}
807833

808-
809-
/* Calculate the last offset position that accommodates a full fragment
810-
* (assuming fagment size = stride-align) */
811-
static int mlx4_en_last_alloc_offset(struct mlx4_en_priv *priv, u16 stride, u16 align)
812-
{
813-
u16 res = MLX4_EN_ALLOC_SIZE % stride;
814-
u16 offset = MLX4_EN_ALLOC_SIZE - stride - res + align;
815-
816-
en_dbg(DRV, priv, "Calculated last offset for stride:%d align:%d "
817-
"res:%d offset:%d\n", stride, align, res, offset);
818-
return offset;
819-
}
820-
821-
822-
static int frag_sizes[] = {
834+
static const int frag_sizes[] = {
823835
FRAG_SZ0,
824836
FRAG_SZ1,
825837
FRAG_SZ2,
@@ -847,9 +859,6 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
847859
priv->frag_info[i].frag_stride =
848860
ALIGN(frag_sizes[i], SMP_CACHE_BYTES);
849861
}
850-
priv->frag_info[i].last_offset = mlx4_en_last_alloc_offset(
851-
priv, priv->frag_info[i].frag_stride,
852-
priv->frag_info[i].frag_align);
853862
buf_size += priv->frag_info[i].frag_size;
854863
i++;
855864
}
@@ -861,13 +870,13 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
861870
en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d "
862871
"num_frags:%d):\n", eff_mtu, priv->num_frags);
863872
for (i = 0; i < priv->num_frags; i++) {
864-
en_dbg(DRV, priv, " frag:%d - size:%d prefix:%d align:%d "
865-
"stride:%d last_offset:%d\n", i,
866-
priv->frag_info[i].frag_size,
867-
priv->frag_info[i].frag_prefix_size,
868-
priv->frag_info[i].frag_align,
869-
priv->frag_info[i].frag_stride,
870-
priv->frag_info[i].last_offset);
873+
en_err(priv,
874+
" frag:%d - size:%d prefix:%d align:%d stride:%d\n",
875+
i,
876+
priv->frag_info[i].frag_size,
877+
priv->frag_info[i].frag_prefix_size,
878+
priv->frag_info[i].frag_align,
879+
priv->frag_info[i].frag_stride);
871880
}
872881
}
873882

drivers/net/ethernet/mellanox/mlx4/mlx4_en.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@
9696

9797
/* Use the maximum between 16384 and a single page */
9898
#define MLX4_EN_ALLOC_SIZE PAGE_ALIGN(16384)
99-
#define MLX4_EN_ALLOC_ORDER get_order(MLX4_EN_ALLOC_SIZE)
99+
100+
#define MLX4_EN_ALLOC_PREFER_ORDER PAGE_ALLOC_COSTLY_ORDER
100101

101102
/* Receive fragment sizes; we use at most 3 fragments (for 9600 byte MTU
102103
* and 4K allocations) */
@@ -234,9 +235,10 @@ struct mlx4_en_tx_desc {
234235
#define MLX4_EN_CX3_HIGH_ID 0x1005
235236

236237
struct mlx4_en_rx_alloc {
237-
struct page *page;
238-
dma_addr_t dma;
239-
u16 offset;
238+
struct page *page;
239+
dma_addr_t dma;
240+
u32 offset;
241+
u32 size;
240242
};
241243

242244
struct mlx4_en_tx_ring {
@@ -439,8 +441,6 @@ struct mlx4_en_frag_info {
439441
u16 frag_prefix_size;
440442
u16 frag_stride;
441443
u16 frag_align;
442-
u16 last_offset;
443-
444444
};
445445

446446
#ifdef CONFIG_MLX4_EN_DCB

0 commit comments

Comments
 (0)