Skip to content

Commit f198274

Browse files
dhowellsbrauner
authored andcommitted
iov_iter: Convert iterate*() to inline funcs
Convert the iov_iter iteration macros to inline functions to make the code easier to follow. The functions are marked __always_inline as we don't want to end up with indirect calls in the code. This, however, leaves dealing with ->copy_mc in an awkard situation since the step function (memcpy_from_iter_mc()) needs to test the flag in the iterator, but isn't passed the iterator. This will be dealt with in a follow-up patch. The variable names in the per-type iterator functions have been harmonised as much as possible and made clearer as to the variable purpose. The iterator functions are also moved to a header file so that other operations that need to scan over an iterator can be added. For instance, the rbd driver could use this to scan a buffer to see if it is all zeros and libceph could use this to generate a crc. Signed-off-by: David Howells <[email protected]> Link: https://lore.kernel.org/r/[email protected]/ # v1 Link: https://lore.kernel.org/r/[email protected]/ # v2 Link: https://lore.kernel.org/r/[email protected]/ # v3 Link: https://lore.kernel.org/r/[email protected] cc: Alexander Viro <[email protected]> cc: Jens Axboe <[email protected]> cc: Christoph Hellwig <[email protected]> cc: Christian Brauner <[email protected]> cc: Matthew Wilcox <[email protected]> cc: Linus Torvalds <[email protected]> cc: David Laight <[email protected]> cc: [email protected] cc: [email protected] cc: [email protected] Signed-off-by: Christian Brauner <[email protected]>
1 parent f1b4cb6 commit f198274

File tree

2 files changed

+449
-241
lines changed

2 files changed

+449
-241
lines changed

include/linux/iov_iter.h

Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,274 @@
1+
/* SPDX-License-Identifier: GPL-2.0-or-later */
2+
/* I/O iterator iteration building functions.
3+
*
4+
* Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
5+
* Written by David Howells ([email protected])
6+
*/
7+
8+
#ifndef _LINUX_IOV_ITER_H
9+
#define _LINUX_IOV_ITER_H
10+
11+
#include <linux/uio.h>
12+
#include <linux/bvec.h>
13+
14+
typedef size_t (*iov_step_f)(void *iter_base, size_t progress, size_t len,
15+
void *priv, void *priv2);
16+
typedef size_t (*iov_ustep_f)(void __user *iter_base, size_t progress, size_t len,
17+
void *priv, void *priv2);
18+
19+
/*
20+
* Handle ITER_UBUF.
21+
*/
22+
static __always_inline
23+
size_t iterate_ubuf(struct iov_iter *iter, size_t len, void *priv, void *priv2,
24+
iov_ustep_f step)
25+
{
26+
void __user *base = iter->ubuf;
27+
size_t progress = 0, remain;
28+
29+
remain = step(base + iter->iov_offset, 0, len, priv, priv2);
30+
progress = len - remain;
31+
iter->iov_offset += progress;
32+
iter->count -= progress;
33+
return progress;
34+
}
35+
36+
/*
37+
* Handle ITER_IOVEC.
38+
*/
39+
static __always_inline
40+
size_t iterate_iovec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
41+
iov_ustep_f step)
42+
{
43+
const struct iovec *p = iter->__iov;
44+
size_t progress = 0, skip = iter->iov_offset;
45+
46+
do {
47+
size_t remain, consumed;
48+
size_t part = min(len, p->iov_len - skip);
49+
50+
if (likely(part)) {
51+
remain = step(p->iov_base + skip, progress, part, priv, priv2);
52+
consumed = part - remain;
53+
progress += consumed;
54+
skip += consumed;
55+
len -= consumed;
56+
if (skip < p->iov_len)
57+
break;
58+
}
59+
p++;
60+
skip = 0;
61+
} while (len);
62+
63+
iter->nr_segs -= p - iter->__iov;
64+
iter->__iov = p;
65+
iter->iov_offset = skip;
66+
iter->count -= progress;
67+
return progress;
68+
}
69+
70+
/*
71+
* Handle ITER_KVEC.
72+
*/
73+
static __always_inline
74+
size_t iterate_kvec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
75+
iov_step_f step)
76+
{
77+
const struct kvec *p = iter->kvec;
78+
size_t progress = 0, skip = iter->iov_offset;
79+
80+
do {
81+
size_t remain, consumed;
82+
size_t part = min(len, p->iov_len - skip);
83+
84+
if (likely(part)) {
85+
remain = step(p->iov_base + skip, progress, part, priv, priv2);
86+
consumed = part - remain;
87+
progress += consumed;
88+
skip += consumed;
89+
len -= consumed;
90+
if (skip < p->iov_len)
91+
break;
92+
}
93+
p++;
94+
skip = 0;
95+
} while (len);
96+
97+
iter->nr_segs -= p - iter->kvec;
98+
iter->kvec = p;
99+
iter->iov_offset = skip;
100+
iter->count -= progress;
101+
return progress;
102+
}
103+
104+
/*
105+
* Handle ITER_BVEC.
106+
*/
107+
static __always_inline
108+
size_t iterate_bvec(struct iov_iter *iter, size_t len, void *priv, void *priv2,
109+
iov_step_f step)
110+
{
111+
const struct bio_vec *p = iter->bvec;
112+
size_t progress = 0, skip = iter->iov_offset;
113+
114+
do {
115+
size_t remain, consumed;
116+
size_t offset = p->bv_offset + skip, part;
117+
void *kaddr = kmap_local_page(p->bv_page + offset / PAGE_SIZE);
118+
119+
part = min3(len,
120+
(size_t)(p->bv_len - skip),
121+
(size_t)(PAGE_SIZE - offset % PAGE_SIZE));
122+
remain = step(kaddr + offset % PAGE_SIZE, progress, part, priv, priv2);
123+
kunmap_local(kaddr);
124+
consumed = part - remain;
125+
len -= consumed;
126+
progress += consumed;
127+
skip += consumed;
128+
if (skip >= p->bv_len) {
129+
skip = 0;
130+
p++;
131+
}
132+
if (remain)
133+
break;
134+
} while (len);
135+
136+
iter->nr_segs -= p - iter->bvec;
137+
iter->bvec = p;
138+
iter->iov_offset = skip;
139+
iter->count -= progress;
140+
return progress;
141+
}
142+
143+
/*
144+
* Handle ITER_XARRAY.
145+
*/
146+
static __always_inline
147+
size_t iterate_xarray(struct iov_iter *iter, size_t len, void *priv, void *priv2,
148+
iov_step_f step)
149+
{
150+
struct folio *folio;
151+
size_t progress = 0;
152+
loff_t start = iter->xarray_start + iter->iov_offset;
153+
pgoff_t index = start / PAGE_SIZE;
154+
XA_STATE(xas, iter->xarray, index);
155+
156+
rcu_read_lock();
157+
xas_for_each(&xas, folio, ULONG_MAX) {
158+
size_t remain, consumed, offset, part, flen;
159+
160+
if (xas_retry(&xas, folio))
161+
continue;
162+
if (WARN_ON(xa_is_value(folio)))
163+
break;
164+
if (WARN_ON(folio_test_hugetlb(folio)))
165+
break;
166+
167+
offset = offset_in_folio(folio, start + progress);
168+
flen = min(folio_size(folio) - offset, len);
169+
170+
while (flen) {
171+
void *base = kmap_local_folio(folio, offset);
172+
173+
part = min_t(size_t, flen,
174+
PAGE_SIZE - offset_in_page(offset));
175+
remain = step(base, progress, part, priv, priv2);
176+
kunmap_local(base);
177+
178+
consumed = part - remain;
179+
progress += consumed;
180+
len -= consumed;
181+
182+
if (remain || len == 0)
183+
goto out;
184+
flen -= consumed;
185+
offset += consumed;
186+
}
187+
}
188+
189+
out:
190+
rcu_read_unlock();
191+
iter->iov_offset += progress;
192+
iter->count -= progress;
193+
return progress;
194+
}
195+
196+
/*
197+
* Handle ITER_DISCARD.
198+
*/
199+
static __always_inline
200+
size_t iterate_discard(struct iov_iter *iter, size_t len, void *priv, void *priv2,
201+
iov_step_f step)
202+
{
203+
size_t progress = len;
204+
205+
iter->count -= progress;
206+
return progress;
207+
}
208+
209+
/**
210+
* iterate_and_advance2 - Iterate over an iterator
211+
* @iter: The iterator to iterate over.
212+
* @len: The amount to iterate over.
213+
* @priv: Data for the step functions.
214+
* @priv2: More data for the step functions.
215+
* @ustep: Function for UBUF/IOVEC iterators; given __user addresses.
216+
* @step: Function for other iterators; given kernel addresses.
217+
*
218+
* Iterate over the next part of an iterator, up to the specified length. The
219+
* buffer is presented in segments, which for kernel iteration are broken up by
220+
* physical pages and mapped, with the mapped address being presented.
221+
*
222+
* Two step functions, @step and @ustep, must be provided, one for handling
223+
* mapped kernel addresses and the other is given user addresses which have the
224+
* potential to fault since no pinning is performed.
225+
*
226+
* The step functions are passed the address and length of the segment, @priv,
227+
* @priv2 and the amount of data so far iterated over (which can, for example,
228+
* be added to @priv to point to the right part of a second buffer). The step
229+
* functions should return the amount of the segment they didn't process (ie. 0
230+
* indicates complete processsing).
231+
*
232+
* This function returns the amount of data processed (ie. 0 means nothing was
233+
* processed and the value of @len means processes to completion).
234+
*/
235+
static __always_inline
236+
size_t iterate_and_advance2(struct iov_iter *iter, size_t len, void *priv,
237+
void *priv2, iov_ustep_f ustep, iov_step_f step)
238+
{
239+
if (unlikely(iter->count < len))
240+
len = iter->count;
241+
if (unlikely(!len))
242+
return 0;
243+
244+
if (likely(iter_is_ubuf(iter)))
245+
return iterate_ubuf(iter, len, priv, priv2, ustep);
246+
if (likely(iter_is_iovec(iter)))
247+
return iterate_iovec(iter, len, priv, priv2, ustep);
248+
if (iov_iter_is_bvec(iter))
249+
return iterate_bvec(iter, len, priv, priv2, step);
250+
if (iov_iter_is_kvec(iter))
251+
return iterate_kvec(iter, len, priv, priv2, step);
252+
if (iov_iter_is_xarray(iter))
253+
return iterate_xarray(iter, len, priv, priv2, step);
254+
return iterate_discard(iter, len, priv, priv2, step);
255+
}
256+
257+
/**
258+
* iterate_and_advance - Iterate over an iterator
259+
* @iter: The iterator to iterate over.
260+
* @len: The amount to iterate over.
261+
* @priv: Data for the step functions.
262+
* @ustep: Function for UBUF/IOVEC iterators; given __user addresses.
263+
* @step: Function for other iterators; given kernel addresses.
264+
*
265+
* As iterate_and_advance2(), but priv2 is always NULL.
266+
*/
267+
static __always_inline
268+
size_t iterate_and_advance(struct iov_iter *iter, size_t len, void *priv,
269+
iov_ustep_f ustep, iov_step_f step)
270+
{
271+
return iterate_and_advance2(iter, len, priv, NULL, ustep, step);
272+
}
273+
274+
#endif /* _LINUX_IOV_ITER_H */

0 commit comments

Comments
 (0)