53
53
static int mlx4_alloc_pages (struct mlx4_en_priv * priv ,
54
54
struct mlx4_en_rx_alloc * page_alloc ,
55
55
const struct mlx4_en_frag_info * frag_info ,
56
- gfp_t _gfp )
56
+ gfp_t gfp )
57
57
{
58
- int order ;
59
58
struct page * page ;
60
59
dma_addr_t dma ;
61
60
62
- for (order = priv -> rx_page_order ; ;) {
63
- gfp_t gfp = _gfp ;
64
-
65
- if (order )
66
- gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NOMEMALLOC ;
67
- page = alloc_pages (gfp , order );
68
- if (likely (page ))
69
- break ;
70
- if (-- order < 0 ||
71
- ((PAGE_SIZE << order ) < frag_info -> frag_size ))
72
- return - ENOMEM ;
73
- }
74
- dma = dma_map_page (priv -> ddev , page , 0 , PAGE_SIZE << order ,
75
- priv -> dma_dir );
61
+ page = alloc_page (gfp );
62
+ if (unlikely (!page ))
63
+ return - ENOMEM ;
64
+ dma = dma_map_page (priv -> ddev , page , 0 , PAGE_SIZE , priv -> dma_dir );
76
65
if (unlikely (dma_mapping_error (priv -> ddev , dma ))) {
77
66
put_page (page );
78
67
return - ENOMEM ;
79
68
}
80
- page_alloc -> page_size = PAGE_SIZE << order ;
81
69
page_alloc -> page = page ;
82
70
page_alloc -> dma = dma ;
83
71
page_alloc -> page_offset = 0 ;
84
72
/* Not doing get_page() for each frag is a big win
85
73
* on asymetric workloads. Note we can not use atomic_set().
86
74
*/
87
- page_ref_add (page , page_alloc -> page_size / frag_info -> frag_stride - 1 );
75
+ page_ref_add (page , PAGE_SIZE / frag_info -> frag_stride - 1 );
88
76
return 0 ;
89
77
}
90
78
@@ -105,7 +93,7 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
105
93
page_alloc [i ].page_offset += frag_info -> frag_stride ;
106
94
107
95
if (page_alloc [i ].page_offset + frag_info -> frag_stride <=
108
- ring_alloc [ i ]. page_size )
96
+ PAGE_SIZE )
109
97
continue ;
110
98
111
99
if (unlikely (mlx4_alloc_pages (priv , & page_alloc [i ],
@@ -127,11 +115,10 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
127
115
while (i -- ) {
128
116
if (page_alloc [i ].page != ring_alloc [i ].page ) {
129
117
dma_unmap_page (priv -> ddev , page_alloc [i ].dma ,
130
- page_alloc [i ].page_size ,
131
- priv -> dma_dir );
118
+ PAGE_SIZE , priv -> dma_dir );
132
119
page = page_alloc [i ].page ;
133
120
/* Revert changes done by mlx4_alloc_pages */
134
- page_ref_sub (page , page_alloc [ i ]. page_size /
121
+ page_ref_sub (page , PAGE_SIZE /
135
122
priv -> frag_info [i ].frag_stride - 1 );
136
123
put_page (page );
137
124
}
@@ -147,8 +134,8 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv,
147
134
u32 next_frag_end = frags [i ].page_offset + 2 * frag_info -> frag_stride ;
148
135
149
136
150
- if (next_frag_end > frags [ i ]. page_size )
151
- dma_unmap_page (priv -> ddev , frags [i ].dma , frags [ i ]. page_size ,
137
+ if (next_frag_end > PAGE_SIZE )
138
+ dma_unmap_page (priv -> ddev , frags [i ].dma , PAGE_SIZE ,
152
139
priv -> dma_dir );
153
140
154
141
if (frags [i ].page )
@@ -168,9 +155,8 @@ static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
168
155
frag_info , GFP_KERNEL | __GFP_COLD ))
169
156
goto out ;
170
157
171
- en_dbg (DRV , priv , " frag %d allocator: - size:%d frags:%d\n" ,
172
- i , ring -> page_alloc [i ].page_size ,
173
- page_ref_count (ring -> page_alloc [i ].page ));
158
+ en_dbg (DRV , priv , " frag %d allocator: - frags:%d\n" ,
159
+ i , page_ref_count (ring -> page_alloc [i ].page ));
174
160
}
175
161
return 0 ;
176
162
@@ -180,11 +166,10 @@ static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
180
166
181
167
page_alloc = & ring -> page_alloc [i ];
182
168
dma_unmap_page (priv -> ddev , page_alloc -> dma ,
183
- page_alloc -> page_size ,
184
- priv -> dma_dir );
169
+ PAGE_SIZE , priv -> dma_dir );
185
170
page = page_alloc -> page ;
186
171
/* Revert changes done by mlx4_alloc_pages */
187
- page_ref_sub (page , page_alloc -> page_size /
172
+ page_ref_sub (page , PAGE_SIZE /
188
173
priv -> frag_info [i ].frag_stride - 1 );
189
174
put_page (page );
190
175
page_alloc -> page = NULL ;
@@ -206,9 +191,9 @@ static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv,
206
191
i , page_count (page_alloc -> page ));
207
192
208
193
dma_unmap_page (priv -> ddev , page_alloc -> dma ,
209
- page_alloc -> page_size , priv -> dma_dir );
194
+ PAGE_SIZE , priv -> dma_dir );
210
195
while (page_alloc -> page_offset + frag_info -> frag_stride <
211
- page_alloc -> page_size ) {
196
+ PAGE_SIZE ) {
212
197
put_page (page_alloc -> page );
213
198
page_alloc -> page_offset += frag_info -> frag_stride ;
214
199
}
@@ -1191,7 +1176,6 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
1191
1176
* This only works when num_frags == 1.
1192
1177
*/
1193
1178
if (priv -> tx_ring_num [TX_XDP ]) {
1194
- priv -> rx_page_order = 0 ;
1195
1179
priv -> frag_info [0 ].frag_size = eff_mtu ;
1196
1180
/* This will gain efficient xdp frame recycling at the
1197
1181
* expense of more costly truesize accounting
@@ -1201,22 +1185,32 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
1201
1185
priv -> rx_headroom = XDP_PACKET_HEADROOM ;
1202
1186
i = 1 ;
1203
1187
} else {
1204
- int buf_size = 0 ;
1188
+ int frag_size_max = 2048 , buf_size = 0 ;
1189
+
1190
+ /* should not happen, right ? */
1191
+ if (eff_mtu > PAGE_SIZE + (MLX4_EN_MAX_RX_FRAGS - 1 ) * 2048 )
1192
+ frag_size_max = PAGE_SIZE ;
1205
1193
1206
1194
while (buf_size < eff_mtu ) {
1207
- int frag_size = eff_mtu - buf_size ;
1195
+ int frag_stride , frag_size = eff_mtu - buf_size ;
1196
+ int pad , nb ;
1208
1197
1209
1198
if (i < MLX4_EN_MAX_RX_FRAGS - 1 )
1210
- frag_size = min (frag_size , 2048 );
1199
+ frag_size = min (frag_size , frag_size_max );
1211
1200
1212
1201
priv -> frag_info [i ].frag_size = frag_size ;
1202
+ frag_stride = ALIGN (frag_size , SMP_CACHE_BYTES );
1203
+ /* We can only pack 2 1536-bytes frames in on 4K page
1204
+ * Therefore, each frame would consume more bytes (truesize)
1205
+ */
1206
+ nb = PAGE_SIZE / frag_stride ;
1207
+ pad = (PAGE_SIZE - nb * frag_stride ) / nb ;
1208
+ pad &= ~(SMP_CACHE_BYTES - 1 );
1209
+ priv -> frag_info [i ].frag_stride = frag_stride + pad ;
1213
1210
1214
- priv -> frag_info [i ].frag_stride = ALIGN (frag_size ,
1215
- SMP_CACHE_BYTES );
1216
1211
buf_size += frag_size ;
1217
1212
i ++ ;
1218
1213
}
1219
- priv -> rx_page_order = MLX4_EN_ALLOC_PREFER_ORDER ;
1220
1214
priv -> dma_dir = PCI_DMA_FROMDEVICE ;
1221
1215
priv -> rx_headroom = 0 ;
1222
1216
}
0 commit comments