Skip to content

Commit ea6e04f

Browse files
SantoshShilimkarLinuxMinion
authored andcommitted
RDS: make congestion code independent of PAGE_SIZE
RDS congestion map code is designed with base assumption of 4K page size. The map update as well transport code assumes it that way. Ofcourse it breaks when transport like IB starts supporting larger fragments than 4K. To overcome this limitation without too many changes to the core congestion map update logic, define indepedent RDS_CONG_PAGE_SIZE and use it. While at it we also move rds_message_map_pages() whose sole purpose it to map congestion pages to congestion code. Orabug: 21894138 Reviwed-by: Wei Lin Guay <[email protected]> Signed-off-by: Santosh Shilimkar <[email protected]>
1 parent 0355b88 commit ea6e04f

File tree

4 files changed

+40
-42
lines changed

4 files changed

+40
-42
lines changed

net/rds/cong.c

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
138138
unsigned long zp;
139139
unsigned long i;
140140
unsigned long flags;
141+
gfp_t mask = GFP_KERNEL | __GFP_ZERO;
141142

142143
map = kzalloc(sizeof(struct rds_cong_map), GFP_KERNEL);
143144
if (!map)
@@ -147,12 +148,12 @@ static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
147148
init_waitqueue_head(&map->m_waitq);
148149
INIT_LIST_HEAD(&map->m_conn_list);
149150

150-
for (i = 0; i < RDS_CONG_MAP_PAGES; i++) {
151-
zp = get_zeroed_page(GFP_KERNEL);
152-
if (zp == 0)
153-
goto out;
154-
map->m_page_addrs[i] = zp;
155-
}
151+
zp = __get_free_pages(mask, get_order(RDS_CONG_MAP_BYTES));
152+
if (zp == 0)
153+
goto out;
154+
155+
for (i = 0; i < RDS_CONG_MAP_PAGES; i++)
156+
map->m_page_addrs[i] = zp + i * RDS_CONG_PAGE_SIZE;
156157

157158
spin_lock_irqsave(&rds_cong_lock, flags);
158159
ret = rds_cong_tree_walk(addr, map);
@@ -165,8 +166,9 @@ static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
165166

166167
out:
167168
if (map) {
168-
for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++)
169-
free_page(map->m_page_addrs[i]);
169+
if (zp)
170+
__free_pages(virt_to_page(map->m_page_addrs[0]),
171+
get_order(RDS_CONG_MAP_BYTES));
170172
kfree(map);
171173
}
172174

@@ -175,6 +177,26 @@ static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
175177
return ret;
176178
}
177179

180+
static struct rds_message *rds_cong_map_pages(unsigned long *page_addrs, unsigned int total_len)
181+
{
182+
struct rds_message *rm;
183+
int num_sgs = RDS_CONG_MAP_SGE;
184+
int extra_bytes = num_sgs * sizeof(struct scatterlist);
185+
186+
rm = rds_message_alloc(extra_bytes, GFP_NOWAIT);
187+
if (!rm)
188+
return ERR_PTR(-ENOMEM);
189+
190+
set_bit(RDS_MSG_PAGEVEC, &rm->m_flags);
191+
rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
192+
rm->data.op_nents = RDS_CONG_MAP_SGE;
193+
rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
194+
195+
sg_set_page(&rm->data.op_sg[0], virt_to_page(page_addrs[0]),
196+
total_len, 0);
197+
return rm;
198+
}
199+
178200
/*
179201
* Put the conn on its local map's list. This is called when the conn is
180202
* really added to the hash. It's nested under the rds_conn_lock, sadly.
@@ -377,14 +399,14 @@ void rds_cong_exit(void)
377399
{
378400
struct rb_node *node;
379401
struct rds_cong_map *map;
380-
unsigned long i;
381402

382403
while ((node = rb_first(&rds_cong_tree))) {
383404
map = rb_entry(node, struct rds_cong_map, m_rb_node);
384405
rdsdebug("freeing map %p\n", map);
385406
rb_erase(&map->m_rb_node, &rds_cong_tree);
386-
for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++)
387-
free_page(map->m_page_addrs[i]);
407+
if (map->m_page_addrs[0])
408+
__free_pages(virt_to_page(map->m_page_addrs[0]),
409+
get_order(RDS_CONG_MAP_BYTES));
388410
kfree(map);
389411
}
390412
}
@@ -397,7 +419,7 @@ struct rds_message *rds_cong_update_alloc(struct rds_connection *conn)
397419
struct rds_cong_map *map = conn->c_lcong;
398420
struct rds_message *rm;
399421

400-
rm = rds_message_map_pages(map->m_page_addrs, RDS_CONG_MAP_BYTES);
422+
rm = rds_cong_map_pages(map->m_page_addrs, RDS_CONG_MAP_BYTES);
401423
if (!IS_ERR(rm))
402424
rm->m_inc.i_hdr.h_flags = RDS_FLAG_CONG_BITMAP;
403425

net/rds/ib_recv.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -990,7 +990,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
990990
uint64_t *src, *dst;
991991
unsigned int k;
992992

993-
to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
993+
to_copy = min(RDS_FRAG_SIZE - frag_off, RDS_CONG_PAGE_SIZE - map_off);
994994
BUG_ON(to_copy & 7); /* Must be 64bit aligned. */
995995

996996
addr = kmap_atomic(sg_page(&frag->f_sg));
@@ -1008,7 +1008,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
10081008
copied += to_copy;
10091009

10101010
map_off += to_copy;
1011-
if (map_off == PAGE_SIZE) {
1011+
if (map_off == RDS_CONG_PAGE_SIZE) {
10121012
map_off = 0;
10131013
map_page++;
10141014
}

net/rds/message.c

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -287,31 +287,6 @@ struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents)
287287
return sg_ret;
288288
}
289289

290-
struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len)
291-
{
292-
struct rds_message *rm;
293-
unsigned int i;
294-
int num_sgs = ceil(total_len, PAGE_SIZE);
295-
int extra_bytes = num_sgs * sizeof(struct scatterlist);
296-
297-
rm = rds_message_alloc(extra_bytes, GFP_NOWAIT);
298-
if (!rm)
299-
return ERR_PTR(-ENOMEM);
300-
301-
set_bit(RDS_MSG_PAGEVEC, &rm->m_flags);
302-
rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
303-
rm->data.op_nents = ceil(total_len, PAGE_SIZE);
304-
rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
305-
306-
for (i = 0; i < rm->data.op_nents; ++i) {
307-
sg_set_page(&rm->data.op_sg[i],
308-
virt_to_page(page_addrs[i]),
309-
PAGE_SIZE, 0);
310-
}
311-
312-
return rm;
313-
}
314-
315290
int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
316291
gfp_t gfp)
317292
{

net/rds/rds.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,11 @@ rdsdebug(char *fmt, ...)
5959
#define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT))
6060

6161
#define RDS_CONG_MAP_BYTES (65536 / 8)
62+
#define RDS_CONG_PAGE_SIZE (1UL << 12)
6263
#define RDS_CONG_MAP_LONGS (RDS_CONG_MAP_BYTES / sizeof(unsigned long))
63-
#define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE)
64-
#define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8)
64+
#define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / RDS_CONG_PAGE_SIZE)
65+
#define RDS_CONG_MAP_PAGE_BITS (RDS_CONG_PAGE_SIZE * 8)
66+
#define RDS_CONG_MAP_SGE 1
6567

6668
struct rds_cong_map {
6769
struct rb_node m_rb_node;
@@ -768,7 +770,6 @@ struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
768770
struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
769771
int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
770772
gfp_t gfp);
771-
struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
772773
void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
773774
__be16 dport, u64 seq);
774775
int rds_message_add_extension(struct rds_header *hdr,

0 commit comments

Comments
 (0)