Skip to content

Commit 3898f52

Browse files
committed
Merge branch 'net-smc-virt-contig-buffers'
Wen Gu says: ==================== net/smc: Introduce virtually contiguous buffers for SMC-R On long-running enterprise production servers, high-order contiguous memory pages are usually very rare and in most cases we can only get fragmented pages. When replacing TCP with SMC-R in such production scenarios, attempting to allocate high-order physically contiguous sndbufs and RMBs may result in frequent memory compaction, which will cause unexpected hung issue and further stability risks. So this patch set is aimed to allow SMC-R link group to use virtually contiguous sndbufs and RMBs to avoid potential issues mentioned above. Whether to use physically or virtually contiguous buffers can be set by sysctl smcr_buf_type. Note that using virtually contiguous buffers will bring an acceptable performance regression, which can be mainly divided into two parts: 1) regression in data path, which is brought by additional address translation of sndbuf by RNIC in Tx. But in general, translating address through MTT is fast. According to qperf test, this part regression is basically less than 10% in latency and bandwidth. (see patch 5/6 for details) 2) regression in buffer initialization and destruction path, which is brought by additional MR operations of sndbufs. But thanks to link group buffer reuse mechanism, the impact of this kind of regression decreases as times of buffer reuse increases. Patch set overview: - Patch 1/6 and 2/6 mainly about simplifying and optimizing DMA sync operation, which will reduce overhead on the data path, especially when using virtually contiguous buffers; - Patch 3/6 and 4/6 introduce a sysctl smcr_buf_type to set the type of buffers in new created link group; - Patch 5/6 allows SMC-R to use virtually contiguous sndbufs and RMBs, including buffer creation, destruction, MR operation and access; - patch 6/6 extends netlink attribute for buffer type of SMC-R link group; v1->v2: - Patch 5/6 fixes build issue on 32bit; - Patch 3/6 adds description of new sysctl in smc-sysctl.rst; ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 2acd102 + ddefb2d commit 3898f52

File tree

14 files changed

+404
-147
lines changed

14 files changed

+404
-147
lines changed

Documentation/networking/smc-sysctl.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,16 @@ autocorking_size - INTEGER
2121
know how/when to uncork their sockets.
2222

2323
Default: 64K
24+
25+
smcr_buf_type - INTEGER
26+
Controls which type of sndbufs and RMBs to use in later newly created
27+
SMC-R link group. Only for SMC-R.
28+
29+
Default: 0 (physically contiguous sndbufs and RMBs)
30+
31+
Possible values:
32+
33+
- 0 - Use physically contiguous buffers
34+
- 1 - Use virtually contiguous buffers
35+
- 2 - Mixed use of the two types. Try physically contiguous buffers first.
36+
If not available, use virtually contiguous buffers then.

include/net/netns/smc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,6 @@ struct netns_smc {
1818
struct ctl_table_header *smc_hdr;
1919
#endif
2020
unsigned int sysctl_autocorking_size;
21+
unsigned int sysctl_smcr_buf_type;
2122
};
2223
#endif

include/uapi/linux/smc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ enum {
124124
SMC_NLA_LGR_R_V2, /* nest */
125125
SMC_NLA_LGR_R_NET_COOKIE, /* u64 */
126126
SMC_NLA_LGR_R_PAD, /* flag */
127+
SMC_NLA_LGR_R_BUF_TYPE, /* u8 */
127128
__SMC_NLA_LGR_R_MAX,
128129
SMC_NLA_LGR_R_MAX = __SMC_NLA_LGR_R_MAX - 1
129130
};

net/smc/af_smc.c

Lines changed: 58 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,29 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
487487
smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
488488
}
489489

490+
/* register the new vzalloced sndbuf on all links */
491+
static int smcr_lgr_reg_sndbufs(struct smc_link *link,
492+
struct smc_buf_desc *snd_desc)
493+
{
494+
struct smc_link_group *lgr = link->lgr;
495+
int i, rc = 0;
496+
497+
if (!snd_desc->is_vm)
498+
return -EINVAL;
499+
500+
/* protect against parallel smcr_link_reg_buf() */
501+
mutex_lock(&lgr->llc_conf_mutex);
502+
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
503+
if (!smc_link_active(&lgr->lnk[i]))
504+
continue;
505+
rc = smcr_link_reg_buf(&lgr->lnk[i], snd_desc);
506+
if (rc)
507+
break;
508+
}
509+
mutex_unlock(&lgr->llc_conf_mutex);
510+
return rc;
511+
}
512+
490513
/* register the new rmb on all links */
491514
static int smcr_lgr_reg_rmbs(struct smc_link *link,
492515
struct smc_buf_desc *rmb_desc)
@@ -498,13 +521,13 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link,
498521
if (rc)
499522
return rc;
500523
/* protect against parallel smc_llc_cli_rkey_exchange() and
501-
* parallel smcr_link_reg_rmb()
524+
* parallel smcr_link_reg_buf()
502525
*/
503526
mutex_lock(&lgr->llc_conf_mutex);
504527
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
505528
if (!smc_link_active(&lgr->lnk[i]))
506529
continue;
507-
rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc);
530+
rc = smcr_link_reg_buf(&lgr->lnk[i], rmb_desc);
508531
if (rc)
509532
goto out;
510533
}
@@ -550,8 +573,15 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
550573

551574
smc_wr_remember_qp_attr(link);
552575

553-
if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
554-
return SMC_CLC_DECL_ERR_REGRMB;
576+
/* reg the sndbuf if it was vzalloced */
577+
if (smc->conn.sndbuf_desc->is_vm) {
578+
if (smcr_link_reg_buf(link, smc->conn.sndbuf_desc))
579+
return SMC_CLC_DECL_ERR_REGBUF;
580+
}
581+
582+
/* reg the rmb */
583+
if (smcr_link_reg_buf(link, smc->conn.rmb_desc))
584+
return SMC_CLC_DECL_ERR_REGBUF;
555585

556586
/* confirm_rkey is implicit on 1st contact */
557587
smc->conn.rmb_desc->is_conf_rkey = true;
@@ -1221,12 +1251,18 @@ static int smc_connect_rdma(struct smc_sock *smc,
12211251
goto connect_abort;
12221252
}
12231253
} else {
1254+
/* reg sendbufs if they were vzalloced */
1255+
if (smc->conn.sndbuf_desc->is_vm) {
1256+
if (smcr_lgr_reg_sndbufs(link, smc->conn.sndbuf_desc)) {
1257+
reason_code = SMC_CLC_DECL_ERR_REGBUF;
1258+
goto connect_abort;
1259+
}
1260+
}
12241261
if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc)) {
1225-
reason_code = SMC_CLC_DECL_ERR_REGRMB;
1262+
reason_code = SMC_CLC_DECL_ERR_REGBUF;
12261263
goto connect_abort;
12271264
}
12281265
}
1229-
smc_rmb_sync_sg_for_device(&smc->conn);
12301266

12311267
if (aclc->hdr.version > SMC_V1) {
12321268
struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
@@ -1750,8 +1786,15 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
17501786
struct smc_llc_qentry *qentry;
17511787
int rc;
17521788

1753-
if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
1754-
return SMC_CLC_DECL_ERR_REGRMB;
1789+
/* reg the sndbuf if it was vzalloced*/
1790+
if (smc->conn.sndbuf_desc->is_vm) {
1791+
if (smcr_link_reg_buf(link, smc->conn.sndbuf_desc))
1792+
return SMC_CLC_DECL_ERR_REGBUF;
1793+
}
1794+
1795+
/* reg the rmb */
1796+
if (smcr_link_reg_buf(link, smc->conn.rmb_desc))
1797+
return SMC_CLC_DECL_ERR_REGBUF;
17551798

17561799
/* send CONFIRM LINK request to client over the RoCE fabric */
17571800
rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
@@ -2110,10 +2153,15 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first)
21102153
struct smc_connection *conn = &new_smc->conn;
21112154

21122155
if (!local_first) {
2156+
/* reg sendbufs if they were vzalloced */
2157+
if (conn->sndbuf_desc->is_vm) {
2158+
if (smcr_lgr_reg_sndbufs(conn->lnk,
2159+
conn->sndbuf_desc))
2160+
return SMC_CLC_DECL_ERR_REGBUF;
2161+
}
21132162
if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc))
2114-
return SMC_CLC_DECL_ERR_REGRMB;
2163+
return SMC_CLC_DECL_ERR_REGBUF;
21152164
}
2116-
smc_rmb_sync_sg_for_device(&new_smc->conn);
21172165

21182166
return 0;
21192167
}

net/smc/smc_clc.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1034,7 +1034,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
10341034
ETH_ALEN);
10351035
hton24(clc->r0.qpn, link->roce_qp->qp_num);
10361036
clc->r0.rmb_rkey =
1037-
htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey);
1037+
htonl(conn->rmb_desc->mr[link->link_idx]->rkey);
10381038
clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
10391039
clc->r0.rmbe_alert_token = htonl(conn->alert_token_local);
10401040
switch (clc->hdr.type) {
@@ -1046,8 +1046,10 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
10461046
break;
10471047
}
10481048
clc->r0.rmbe_size = conn->rmbe_size_short;
1049-
clc->r0.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
1050-
(conn->rmb_desc->sgt[link->link_idx].sgl));
1049+
clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ?
1050+
cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) :
1051+
cpu_to_be64((u64)sg_dma_address
1052+
(conn->rmb_desc->sgt[link->link_idx].sgl));
10511053
hton24(clc->r0.psn, link->psn_initial);
10521054
if (version == SMC_V1) {
10531055
clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);

net/smc/smc_clc.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
#define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */
6363
#define SMC_CLC_DECL_ERR_RTOK 0x09990001 /* rtoken handling failed */
6464
#define SMC_CLC_DECL_ERR_RDYLNK 0x09990002 /* ib ready link failed */
65-
#define SMC_CLC_DECL_ERR_REGRMB 0x09990003 /* reg rmb failed */
65+
#define SMC_CLC_DECL_ERR_REGBUF 0x09990003 /* reg rdma bufs failed */
6666

6767
#define SMC_FIRST_CONTACT_MASK 0b10 /* first contact bit within typev2 */
6868

0 commit comments

Comments
 (0)