Skip to content

Commit 7e07efb

Browse files
committed
rbd: move from raw pages to bvec data descriptors
In preparation for rbd "fancy" striping which requires bio_vec arrays, wire up BVECS data type and kill off PAGES data type. There is nothing wrong with using page vectors for copyup requests -- it's just less iterator boilerplate code to write for the new striping framework. Signed-off-by: Ilya Dryomov <[email protected]> Reviewed-by: Alex Elder <[email protected]>
1 parent b9e281c commit 7e07efb

File tree

1 file changed

+77
-78
lines changed

1 file changed

+77
-78
lines changed

drivers/block/rbd.c

Lines changed: 77 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ typedef void (*rbd_obj_callback_t)(struct rbd_obj_request *);
219219
enum obj_request_type {
220220
OBJ_REQUEST_NODATA = 1,
221221
OBJ_REQUEST_BIO, /* pointer into provided bio (list) */
222-
OBJ_REQUEST_PAGES,
222+
OBJ_REQUEST_BVECS, /* pointer into provided bio_vec array */
223223
};
224224

225225
enum obj_operation_type {
@@ -272,12 +272,12 @@ struct rbd_obj_request {
272272
union {
273273
struct ceph_bio_iter bio_pos;
274274
struct {
275-
struct page **pages;
276-
u32 page_count;
275+
struct ceph_bvec_iter bvec_pos;
276+
u32 bvec_count;
277277
};
278278
};
279-
struct page **copyup_pages;
280-
u32 copyup_page_count;
279+
struct bio_vec *copyup_bvecs;
280+
u32 copyup_bvec_count;
281281

282282
struct ceph_osd_request *osd_req;
283283

@@ -1272,36 +1272,14 @@ static void zero_bios(struct ceph_bio_iter *bio_pos, u32 off, u32 bytes)
12721272
}));
12731273
}
12741274

1275-
/*
1276-
* similar to zero_bio_chain(), zeros data defined by a page array,
1277-
* starting at the given byte offset from the start of the array and
1278-
* continuing up to the given end offset. The pages array is
1279-
* assumed to be big enough to hold all bytes up to the end.
1280-
*/
1281-
static void zero_pages(struct page **pages, u64 offset, u64 end)
1275+
static void zero_bvecs(struct ceph_bvec_iter *bvec_pos, u32 off, u32 bytes)
12821276
{
1283-
struct page **page = &pages[offset >> PAGE_SHIFT];
1284-
1285-
rbd_assert(end > offset);
1286-
rbd_assert(end - offset <= (u64)SIZE_MAX);
1287-
while (offset < end) {
1288-
size_t page_offset;
1289-
size_t length;
1290-
unsigned long flags;
1291-
void *kaddr;
1292-
1293-
page_offset = offset & ~PAGE_MASK;
1294-
length = min_t(size_t, PAGE_SIZE - page_offset, end - offset);
1295-
local_irq_save(flags);
1296-
kaddr = kmap_atomic(*page);
1297-
memset(kaddr + page_offset, 0, length);
1298-
flush_dcache_page(*page);
1299-
kunmap_atomic(kaddr);
1300-
local_irq_restore(flags);
1277+
struct ceph_bvec_iter it = *bvec_pos;
13011278

1302-
offset += length;
1303-
page++;
1304-
}
1279+
ceph_bvec_iter_advance(&it, off);
1280+
ceph_bvec_iter_advance_step(&it, bytes, ({
1281+
zero_bvec(&bv);
1282+
}));
13051283
}
13061284

13071285
/*
@@ -1461,7 +1439,7 @@ static bool obj_request_type_valid(enum obj_request_type type)
14611439
switch (type) {
14621440
case OBJ_REQUEST_NODATA:
14631441
case OBJ_REQUEST_BIO:
1464-
case OBJ_REQUEST_PAGES:
1442+
case OBJ_REQUEST_BVECS:
14651443
return true;
14661444
default:
14671445
return false;
@@ -1611,14 +1589,15 @@ rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request)
16111589
if (obj_request->type == OBJ_REQUEST_BIO)
16121590
zero_bios(&obj_request->bio_pos, 0, length);
16131591
else
1614-
zero_pages(obj_request->pages, 0, length);
1592+
zero_bvecs(&obj_request->bvec_pos, 0, length);
16151593
obj_request->result = 0;
16161594
} else if (xferred < length && !obj_request->result) {
16171595
if (obj_request->type == OBJ_REQUEST_BIO)
16181596
zero_bios(&obj_request->bio_pos, xferred,
16191597
length - xferred);
16201598
else
1621-
zero_pages(obj_request->pages, xferred, length);
1599+
zero_bvecs(&obj_request->bvec_pos, xferred,
1600+
length - xferred);
16221601
}
16231602
obj_request->xferred = length;
16241603
obj_request_done_set(obj_request);
@@ -1913,6 +1892,7 @@ rbd_obj_request_create(enum obj_request_type type)
19131892
static void rbd_obj_request_destroy(struct kref *kref)
19141893
{
19151894
struct rbd_obj_request *obj_request;
1895+
u32 i;
19161896

19171897
obj_request = container_of(kref, struct rbd_obj_request, kref);
19181898

@@ -1924,22 +1904,22 @@ static void rbd_obj_request_destroy(struct kref *kref)
19241904
if (obj_request->osd_req)
19251905
rbd_osd_req_destroy(obj_request->osd_req);
19261906

1927-
rbd_assert(obj_request_type_valid(obj_request->type));
19281907
switch (obj_request->type) {
19291908
case OBJ_REQUEST_NODATA:
19301909
case OBJ_REQUEST_BIO:
1910+
case OBJ_REQUEST_BVECS:
19311911
break; /* Nothing to do */
1932-
case OBJ_REQUEST_PAGES:
1933-
/* img_data requests don't own their page array */
1934-
if (obj_request->pages &&
1935-
!obj_request_img_data_test(obj_request))
1936-
ceph_release_page_vector(obj_request->pages,
1937-
obj_request->page_count);
1938-
break;
1912+
default:
1913+
rbd_assert(0);
19391914
}
19401915

1941-
ceph_release_page_vector(obj_request->copyup_pages,
1942-
obj_request->copyup_page_count);
1916+
if (obj_request->copyup_bvecs) {
1917+
for (i = 0; i < obj_request->copyup_bvec_count; i++) {
1918+
if (obj_request->copyup_bvecs[i].bv_page)
1919+
__free_page(obj_request->copyup_bvecs[i].bv_page);
1920+
}
1921+
kfree(obj_request->copyup_bvecs);
1922+
}
19431923

19441924
kmem_cache_free(rbd_obj_request_cache, obj_request);
19451925
}
@@ -2260,10 +2240,9 @@ static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request,
22602240
if (obj_request->type == OBJ_REQUEST_BIO)
22612241
osd_req_op_extent_osd_data_bio(osd_request, num_ops,
22622242
&obj_request->bio_pos, length);
2263-
else if (obj_request->type == OBJ_REQUEST_PAGES)
2264-
osd_req_op_extent_osd_data_pages(osd_request, num_ops,
2265-
obj_request->pages, length,
2266-
offset & ~PAGE_MASK, false, false);
2243+
else if (obj_request->type == OBJ_REQUEST_BVECS)
2244+
osd_req_op_extent_osd_data_bvec_pos(osd_request, num_ops,
2245+
&obj_request->bvec_pos);
22672246

22682247
/* Discards are also writes */
22692248
if (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD)
@@ -2288,7 +2267,7 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
22882267
struct rbd_obj_request *obj_request = NULL;
22892268
struct rbd_obj_request *next_obj_request;
22902269
struct ceph_bio_iter bio_it;
2291-
struct page **pages = NULL;
2270+
struct ceph_bvec_iter bvec_it;
22922271
enum obj_operation_type op_type;
22932272
u64 img_offset;
22942273
u64 resid;
@@ -2305,8 +2284,8 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
23052284
bio_it = *(struct ceph_bio_iter *)data_desc;
23062285
rbd_assert(img_offset ==
23072286
bio_it.iter.bi_sector << SECTOR_SHIFT);
2308-
} else if (type == OBJ_REQUEST_PAGES) {
2309-
pages = data_desc;
2287+
} else if (type == OBJ_REQUEST_BVECS) {
2288+
bvec_it = *(struct ceph_bvec_iter *)data_desc;
23102289
}
23112290

23122291
while (resid) {
@@ -2332,15 +2311,10 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
23322311
if (type == OBJ_REQUEST_BIO) {
23332312
obj_request->bio_pos = bio_it;
23342313
ceph_bio_iter_advance(&bio_it, length);
2335-
} else if (type == OBJ_REQUEST_PAGES) {
2336-
unsigned int page_count;
2337-
2338-
obj_request->pages = pages;
2339-
page_count = (u32)calc_pages_for(offset, length);
2340-
obj_request->page_count = page_count;
2341-
if ((offset + length) & ~PAGE_MASK)
2342-
page_count--; /* more on last page */
2343-
pages += page_count;
2314+
} else if (type == OBJ_REQUEST_BVECS) {
2315+
obj_request->bvec_pos = bvec_it;
2316+
ceph_bvec_iter_shorten(&obj_request->bvec_pos, length);
2317+
ceph_bvec_iter_advance(&bvec_it, length);
23442318
}
23452319

23462320
osd_req = rbd_osd_req_create(rbd_dev, op_type,
@@ -2452,8 +2426,8 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
24522426
/* Initialize the copyup op */
24532427

24542428
osd_req_op_cls_init(osd_req, 0, CEPH_OSD_OP_CALL, "rbd", "copyup");
2455-
osd_req_op_cls_request_data_pages(osd_req, 0, orig_request->copyup_pages,
2456-
parent_length, 0, false, false);
2429+
osd_req_op_cls_request_data_bvecs(osd_req, 0, orig_request->copyup_bvecs,
2430+
parent_length);
24572431

24582432
/* Add the other op(s) */
24592433

@@ -2469,6 +2443,8 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
24692443
rbd_obj_request_error(orig_request, img_result);
24702444
}
24712445

2446+
static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap);
2447+
24722448
/*
24732449
* Read from the parent image the range of data that covers the
24742450
* entire target of the given object request. This is used for
@@ -2487,10 +2463,9 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request)
24872463
{
24882464
struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
24892465
struct rbd_img_request *parent_request = NULL;
2466+
struct ceph_bvec_iter bvec_it = { 0 };
24902467
u64 img_offset;
24912468
u64 length;
2492-
struct page **pages = NULL;
2493-
u32 page_count;
24942469
int result;
24952470

24962471
rbd_assert(rbd_dev->parent != NULL);
@@ -2516,24 +2491,20 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request)
25162491
* Allocate a page array big enough to receive the data read
25172492
* from the parent.
25182493
*/
2519-
page_count = (u32)calc_pages_for(0, length);
2520-
pages = ceph_alloc_page_vector(page_count, GFP_NOIO);
2521-
if (IS_ERR(pages)) {
2522-
result = PTR_ERR(pages);
2494+
result = setup_copyup_bvecs(obj_request, length);
2495+
if (result)
25232496
goto out_err;
2524-
}
2525-
2526-
rbd_assert(!obj_request->copyup_pages);
2527-
obj_request->copyup_pages = pages;
2528-
obj_request->copyup_page_count = page_count;
25292497

25302498
result = -ENOMEM;
25312499
parent_request = rbd_parent_request_create(obj_request,
25322500
img_offset, length);
25332501
if (!parent_request)
25342502
goto out_err;
25352503

2536-
result = rbd_img_request_fill(parent_request, OBJ_REQUEST_PAGES, pages);
2504+
bvec_it.bvecs = obj_request->copyup_bvecs;
2505+
bvec_it.iter.bi_size = length;
2506+
result = rbd_img_request_fill(parent_request, OBJ_REQUEST_BVECS,
2507+
&bvec_it);
25372508
if (result)
25382509
goto out_err;
25392510

@@ -2751,6 +2722,34 @@ static int rbd_img_request_submit(struct rbd_img_request *img_request)
27512722
return ret;
27522723
}
27532724

2725+
static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap)
2726+
{
2727+
u32 i;
2728+
2729+
rbd_assert(!obj_req->copyup_bvecs);
2730+
obj_req->copyup_bvec_count = calc_pages_for(0, obj_overlap);
2731+
obj_req->copyup_bvecs = kcalloc(obj_req->copyup_bvec_count,
2732+
sizeof(*obj_req->copyup_bvecs),
2733+
GFP_NOIO);
2734+
if (!obj_req->copyup_bvecs)
2735+
return -ENOMEM;
2736+
2737+
for (i = 0; i < obj_req->copyup_bvec_count; i++) {
2738+
unsigned int len = min(obj_overlap, (u64)PAGE_SIZE);
2739+
2740+
obj_req->copyup_bvecs[i].bv_page = alloc_page(GFP_NOIO);
2741+
if (!obj_req->copyup_bvecs[i].bv_page)
2742+
return -ENOMEM;
2743+
2744+
obj_req->copyup_bvecs[i].bv_offset = 0;
2745+
obj_req->copyup_bvecs[i].bv_len = len;
2746+
obj_overlap -= len;
2747+
}
2748+
2749+
rbd_assert(!obj_overlap);
2750+
return 0;
2751+
}
2752+
27542753
static void rbd_img_parent_read_callback(struct rbd_img_request *img_request)
27552754
{
27562755
struct rbd_obj_request *obj_request;
@@ -2832,8 +2831,8 @@ static void rbd_img_parent_read(struct rbd_obj_request *obj_request)
28322831
result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO,
28332832
&obj_request->bio_pos);
28342833
else
2835-
result = rbd_img_request_fill(img_request, OBJ_REQUEST_PAGES,
2836-
obj_request->pages);
2834+
result = rbd_img_request_fill(img_request, OBJ_REQUEST_BVECS,
2835+
&obj_request->bvec_pos);
28372836
if (result)
28382837
goto out_err;
28392838

0 commit comments

Comments
 (0)