@@ -87,50 +87,287 @@ struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
87
87
return ERR_PTR (err );
88
88
}
89
89
90
+ enum {
91
+ MLX4_MAX_MTT_SHIFT = 31
92
+ };
93
+
94
+ static int mlx4_ib_umem_write_mtt_block (struct mlx4_ib_dev * dev ,
95
+ struct mlx4_mtt * mtt ,
96
+ u64 mtt_size , u64 mtt_shift , u64 len ,
97
+ u64 cur_start_addr , u64 * pages ,
98
+ int * start_index , int * npages )
99
+ {
100
+ u64 cur_end_addr = cur_start_addr + len ;
101
+ u64 cur_end_addr_aligned = 0 ;
102
+ u64 mtt_entries ;
103
+ int err = 0 ;
104
+ int k ;
105
+
106
+ len += (cur_start_addr & (mtt_size - 1ULL ));
107
+ cur_end_addr_aligned = round_up (cur_end_addr , mtt_size );
108
+ len += (cur_end_addr_aligned - cur_end_addr );
109
+ if (len & (mtt_size - 1ULL )) {
110
+ pr_warn ("write_block: len %llx is not aligned to mtt_size %llx\n" ,
111
+ len , mtt_size );
112
+ return - EINVAL ;
113
+ }
114
+
115
+ mtt_entries = (len >> mtt_shift );
116
+
117
+ /*
118
+ * Align the MTT start address to the mtt_size.
119
+ * Required to handle cases when the MR starts in the middle of an MTT
120
+ * record. Was not required in old code since the physical addresses
121
+ * provided by the dma subsystem were page aligned, which was also the
122
+ * MTT size.
123
+ */
124
+ cur_start_addr = round_down (cur_start_addr , mtt_size );
125
+ /* A new block is started ... */
126
+ for (k = 0 ; k < mtt_entries ; ++ k ) {
127
+ pages [* npages ] = cur_start_addr + (mtt_size * k );
128
+ (* npages )++ ;
129
+ /*
130
+ * Be friendly to mlx4_write_mtt() and pass it chunks of
131
+ * appropriate size.
132
+ */
133
+ if (* npages == PAGE_SIZE / sizeof (u64 )) {
134
+ err = mlx4_write_mtt (dev -> dev , mtt , * start_index ,
135
+ * npages , pages );
136
+ if (err )
137
+ return err ;
138
+
139
+ (* start_index ) += * npages ;
140
+ * npages = 0 ;
141
+ }
142
+ }
143
+
144
+ return 0 ;
145
+ }
146
+
147
+ static inline u64 alignment_of (u64 ptr )
148
+ {
149
+ return ilog2 (ptr & (~(ptr - 1 )));
150
+ }
151
+
152
+ static int mlx4_ib_umem_calc_block_mtt (u64 next_block_start ,
153
+ u64 current_block_end ,
154
+ u64 block_shift )
155
+ {
156
+ /* Check whether the alignment of the new block is aligned as well as
157
+ * the previous block.
158
+ * Block address must start with zeros till size of entity_size.
159
+ */
160
+ if ((next_block_start & ((1ULL << block_shift ) - 1ULL )) != 0 )
161
+ /*
162
+ * It is not as well aligned as the previous block-reduce the
163
+ * mtt size accordingly. Here we take the last right bit which
164
+ * is 1.
165
+ */
166
+ block_shift = alignment_of (next_block_start );
167
+
168
+ /*
169
+ * Check whether the alignment of the end of previous block - is it
170
+ * aligned as well as the start of the block
171
+ */
172
+ if (((current_block_end ) & ((1ULL << block_shift ) - 1ULL )) != 0 )
173
+ /*
174
+ * It is not as well aligned as the start of the block -
175
+ * reduce the mtt size accordingly.
176
+ */
177
+ block_shift = alignment_of (current_block_end );
178
+
179
+ return block_shift ;
180
+ }
181
+
90
182
int mlx4_ib_umem_write_mtt (struct mlx4_ib_dev * dev , struct mlx4_mtt * mtt ,
91
183
struct ib_umem * umem )
92
184
{
93
185
u64 * pages ;
94
- int i , k , entry ;
95
- int n ;
96
- int len ;
186
+ u64 len = 0 ;
97
187
int err = 0 ;
188
+ u64 mtt_size ;
189
+ u64 cur_start_addr = 0 ;
190
+ u64 mtt_shift ;
191
+ int start_index = 0 ;
192
+ int npages = 0 ;
98
193
struct scatterlist * sg ;
194
+ int i ;
99
195
100
196
pages = (u64 * ) __get_free_page (GFP_KERNEL );
101
197
if (!pages )
102
198
return - ENOMEM ;
103
199
104
- i = n = 0 ;
200
+ mtt_shift = mtt -> page_shift ;
201
+ mtt_size = 1ULL << mtt_shift ;
105
202
106
- for_each_sg (umem -> sg_head .sgl , sg , umem -> nmap , entry ) {
107
- len = sg_dma_len (sg ) >> mtt -> page_shift ;
108
- for (k = 0 ; k < len ; ++ k ) {
109
- pages [i ++ ] = sg_dma_address (sg ) +
110
- (k << umem -> page_shift );
111
- /*
112
- * Be friendly to mlx4_write_mtt() and
113
- * pass it chunks of appropriate size.
114
- */
115
- if (i == PAGE_SIZE / sizeof (u64 )) {
116
- err = mlx4_write_mtt (dev -> dev , mtt , n ,
117
- i , pages );
118
- if (err )
119
- goto out ;
120
- n += i ;
121
- i = 0 ;
122
- }
203
+ for_each_sg (umem -> sg_head .sgl , sg , umem -> nmap , i ) {
204
+ if (cur_start_addr + len == sg_dma_address (sg )) {
205
+ /* still the same block */
206
+ len += sg_dma_len (sg );
207
+ continue ;
123
208
}
209
+ /*
210
+ * A new block is started ...
211
+ * If len is malaligned, write an extra mtt entry to cover the
212
+ * misaligned area (round up the division)
213
+ */
214
+ err = mlx4_ib_umem_write_mtt_block (dev , mtt , mtt_size ,
215
+ mtt_shift , len ,
216
+ cur_start_addr ,
217
+ pages , & start_index ,
218
+ & npages );
219
+ if (err )
220
+ goto out ;
221
+
222
+ cur_start_addr = sg_dma_address (sg );
223
+ len = sg_dma_len (sg );
124
224
}
125
225
126
- if (i )
127
- err = mlx4_write_mtt (dev -> dev , mtt , n , i , pages );
226
+ /* Handle the last block */
227
+ if (len > 0 ) {
228
+ /*
229
+ * If len is malaligned, write an extra mtt entry to cover
230
+ * the misaligned area (round up the division)
231
+ */
232
+ err = mlx4_ib_umem_write_mtt_block (dev , mtt , mtt_size ,
233
+ mtt_shift , len ,
234
+ cur_start_addr , pages ,
235
+ & start_index , & npages );
236
+ if (err )
237
+ goto out ;
238
+ }
239
+
240
+ if (npages )
241
+ err = mlx4_write_mtt (dev -> dev , mtt , start_index , npages , pages );
128
242
129
243
out :
130
244
free_page ((unsigned long ) pages );
131
245
return err ;
132
246
}
133
247
248
+ /*
249
+ * Calculate optimal mtt size based on contiguous pages.
250
+ * Function will return also the number of pages that are not aligned to the
251
+ * calculated mtt_size to be added to total number of pages. For that we should
252
+ * check the first chunk length & last chunk length and if not aligned to
253
+ * mtt_size we should increment the non_aligned_pages number. All chunks in the
254
+ * middle already handled as part of mtt shift calculation for both their start
255
+ * & end addresses.
256
+ */
257
+ static int mlx4_ib_umem_calc_optimal_mtt_size (struct ib_umem * umem ,
258
+ u64 start_va ,
259
+ int * num_of_mtts )
260
+ {
261
+ u64 block_shift = MLX4_MAX_MTT_SHIFT ;
262
+ u64 min_shift = umem -> page_shift ;
263
+ u64 last_block_aligned_end = 0 ;
264
+ u64 current_block_start = 0 ;
265
+ u64 first_block_start = 0 ;
266
+ u64 current_block_len = 0 ;
267
+ u64 last_block_end = 0 ;
268
+ struct scatterlist * sg ;
269
+ u64 current_block_end ;
270
+ u64 misalignment_bits ;
271
+ u64 next_block_start ;
272
+ u64 total_len = 0 ;
273
+ int i ;
274
+
275
+ for_each_sg (umem -> sg_head .sgl , sg , umem -> nmap , i ) {
276
+ /*
277
+ * Initialization - save the first chunk start as the
278
+ * current_block_start - block means contiguous pages.
279
+ */
280
+ if (current_block_len == 0 && current_block_start == 0 ) {
281
+ current_block_start = sg_dma_address (sg );
282
+ first_block_start = current_block_start ;
283
+ /*
284
+ * Find the bits that are different between the physical
285
+ * address and the virtual address for the start of the
286
+ * MR.
287
+ * umem_get aligned the start_va to a page boundary.
288
+ * Therefore, we need to align the start va to the same
289
+ * boundary.
290
+ * misalignment_bits is needed to handle the case of a
291
+ * single memory region. In this case, the rest of the
292
+ * logic will not reduce the block size. If we use a
293
+ * block size which is bigger than the alignment of the
294
+ * misalignment bits, we might use the virtual page
295
+ * number instead of the physical page number, resulting
296
+ * in access to the wrong data.
297
+ */
298
+ misalignment_bits =
299
+ (start_va & (~(((u64 )(BIT (umem -> page_shift ))) - 1ULL )))
300
+ ^ current_block_start ;
301
+ block_shift = min (alignment_of (misalignment_bits ),
302
+ block_shift );
303
+ }
304
+
305
+ /*
306
+ * Go over the scatter entries and check if they continue the
307
+ * previous scatter entry.
308
+ */
309
+ next_block_start = sg_dma_address (sg );
310
+ current_block_end = current_block_start + current_block_len ;
311
+ /* If we have a split (non-contig.) between two blocks */
312
+ if (current_block_end != next_block_start ) {
313
+ block_shift = mlx4_ib_umem_calc_block_mtt
314
+ (next_block_start ,
315
+ current_block_end ,
316
+ block_shift );
317
+
318
+ /*
319
+ * If we reached the minimum shift for 4k page we stop
320
+ * the loop.
321
+ */
322
+ if (block_shift <= min_shift )
323
+ goto end ;
324
+
325
+ /*
326
+ * If not saved yet we are in first block - we save the
327
+ * length of first block to calculate the
328
+ * non_aligned_pages number at the end.
329
+ */
330
+ total_len += current_block_len ;
331
+
332
+ /* Start a new block */
333
+ current_block_start = next_block_start ;
334
+ current_block_len = sg_dma_len (sg );
335
+ continue ;
336
+ }
337
+ /* The scatter entry is another part of the current block,
338
+ * increase the block size.
339
+ * An entry in the scatter can be larger than 4k (page) as of
340
+ * dma mapping which merge some blocks together.
341
+ */
342
+ current_block_len += sg_dma_len (sg );
343
+ }
344
+
345
+ /* Account for the last block in the total len */
346
+ total_len += current_block_len ;
347
+ /* Add to the first block the misalignment that it suffers from. */
348
+ total_len += (first_block_start & ((1ULL << block_shift ) - 1ULL ));
349
+ last_block_end = current_block_start + current_block_len ;
350
+ last_block_aligned_end = round_up (last_block_end , 1 << block_shift );
351
+ total_len += (last_block_aligned_end - last_block_end );
352
+
353
+ if (total_len & ((1ULL << block_shift ) - 1ULL ))
354
+ pr_warn ("misaligned total length detected (%llu, %llu)!" ,
355
+ total_len , block_shift );
356
+
357
+ * num_of_mtts = total_len >> block_shift ;
358
+ end :
359
+ if (block_shift < min_shift ) {
360
+ /*
361
+ * If shift is less than the min we set a warning and return the
362
+ * min shift.
363
+ */
364
+ pr_warn ("umem_calc_optimal_mtt_size - unexpected shift %lld\n" , block_shift );
365
+
366
+ block_shift = min_shift ;
367
+ }
368
+ return block_shift ;
369
+ }
370
+
134
371
struct ib_mr * mlx4_ib_reg_user_mr (struct ib_pd * pd , u64 start , u64 length ,
135
372
u64 virt_addr , int access_flags ,
136
373
struct ib_udata * udata )
@@ -155,7 +392,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
155
392
}
156
393
157
394
n = ib_umem_page_count (mr -> umem );
158
- shift = mr -> umem -> page_shift ;
395
+ shift = mlx4_ib_umem_calc_optimal_mtt_size ( mr -> umem , start , & n ) ;
159
396
160
397
err = mlx4_mr_alloc (dev -> dev , to_mpd (pd )-> pdn , virt_addr , length ,
161
398
convert_access (access_flags ), n , shift , & mr -> mmr );
0 commit comments