Skip to content

Commit 44306f1

Browse files
j-xiongdledford
authored andcommitted
IB/hfi1: Reduce kernel context pio buffer allocation
The pio buffers were pooled evenly among all kernel contexts and user contexts. However, the demand from kernel contexts is much lower than user contexts. This patch reduces the allocation for kernel contexts and thus makes more credits available for PSM, helping performance. This is especially useful on high core-count systems where large numbers of contexts are used. A new context type SC_VL15 is added to distinguish the context used for VL15 from other kernel contexts. The reason is that VL15 needs to support 2KB sized packet while other kernel contexts need only support packets up to the size determined by "piothreshold", which has a default value of 256. The new allocation method allows triple buffering of largest pio packets configured for these contexts. This is sufficient to maintain verbs performance. The largest pio packet size is 2048B for VL15 and "piothreshold" for other kernel contexts. A cap is applied to "piothreshold" to avoid excessive buffer allocation. The special case that SDMA is disable is handled differently. In that case, the original pooling allocation is used to better support the much higher pio traffic. Notice that if adaptive pio is disabled (piothreshold==0), the pio buffer size doesn't matter for non-VL15 kernel send contexts when SDMA is enabled because pio is not used at all on these contexts and thus the new allocation is still valid. If SDMA is disabled then pooling allocation is used as mentioned in previous paragraph. Adjustment is also made to the calculation of the credit return threshold for the kernel contexts. Instead of purely based on the MTU size, a percentage based threshold is also considered and the smaller one of the two is chosen. This is necessary to ensure that with the reduced buffer allocation credits are returned in time to avoid unnecessary stall in the send path. Reviewed-by: Mike Marciniszyn <[email protected]> Reviewed-by: Dean Luick <[email protected]> Reviewed-by: Dennis Dalessandro <[email protected]> Reviewed-by: Mark Debbage <[email protected]> Reviewed-by: Jubin John <[email protected]> Signed-off-by: Jianxin Xiong <[email protected]> Signed-off-by: Doug Ledford <[email protected]>
1 parent 0852d24 commit 44306f1

File tree

4 files changed

+61
-25
lines changed

4 files changed

+61
-25
lines changed

drivers/staging/rdma/hfi1/chip.c

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5661,7 +5661,7 @@ static int sc_to_vl(struct hfi1_devdata *dd, int sw_index)
56615661
sci = &dd->send_contexts[sw_index];
56625662

56635663
/* there is no information for user (PSM) and ack contexts */
5664-
if (sci->type != SC_KERNEL)
5664+
if ((sci->type != SC_KERNEL) && (sci->type != SC_VL15))
56655665
return -1;
56665666

56675667
sc = sci->sc;
@@ -9627,6 +9627,7 @@ static void set_send_length(struct hfi1_pportdata *ppd)
96279627
& SEND_LEN_CHECK1_LEN_VL15_MASK) <<
96289628
SEND_LEN_CHECK1_LEN_VL15_SHIFT;
96299629
int i;
9630+
u32 thres;
96309631

96319632
for (i = 0; i < ppd->vls_supported; i++) {
96329633
if (dd->vld[i].mtu > maxvlmtu)
@@ -9645,16 +9646,17 @@ static void set_send_length(struct hfi1_pportdata *ppd)
96459646
/* adjust kernel credit return thresholds based on new MTUs */
96469647
/* all kernel receive contexts have the same hdrqentsize */
96479648
for (i = 0; i < ppd->vls_supported; i++) {
9648-
sc_set_cr_threshold(dd->vld[i].sc,
9649-
sc_mtu_to_threshold(dd->vld[i].sc,
9650-
dd->vld[i].mtu,
9651-
dd->rcd[0]->
9652-
rcvhdrqentsize));
9653-
}
9654-
sc_set_cr_threshold(dd->vld[15].sc,
9655-
sc_mtu_to_threshold(dd->vld[15].sc,
9656-
dd->vld[15].mtu,
9649+
thres = min(sc_percent_to_threshold(dd->vld[i].sc, 50),
9650+
sc_mtu_to_threshold(dd->vld[i].sc,
9651+
dd->vld[i].mtu,
96579652
dd->rcd[0]->rcvhdrqentsize));
9653+
sc_set_cr_threshold(dd->vld[i].sc, thres);
9654+
}
9655+
thres = min(sc_percent_to_threshold(dd->vld[15].sc, 50),
9656+
sc_mtu_to_threshold(dd->vld[15].sc,
9657+
dd->vld[15].mtu,
9658+
dd->rcd[0]->rcvhdrqentsize));
9659+
sc_set_cr_threshold(dd->vld[15].sc, thres);
96589660

96599661
/* Adjust maximum MTU for the port in DC */
96609662
dcmtu = maxvlmtu == 10240 ? DCC_CFG_PORT_MTU_CAP_10240 :
@@ -12728,12 +12730,13 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
1272812730
dd->num_send_contexts = ret;
1272912731
dd_dev_info(
1273012732
dd,
12731-
"send contexts: chip %d, used %d (kernel %d, ack %d, user %d)\n",
12733+
"send contexts: chip %d, used %d (kernel %d, ack %d, user %d, vl15 %d)\n",
1273212734
dd->chip_send_contexts,
1273312735
dd->num_send_contexts,
1273412736
dd->sc_sizes[SC_KERNEL].count,
1273512737
dd->sc_sizes[SC_ACK].count,
12736-
dd->sc_sizes[SC_USER].count);
12738+
dd->sc_sizes[SC_USER].count,
12739+
dd->sc_sizes[SC_VL15].count);
1273712740
ret = 0; /* success */
1273812741
}
1273912742

drivers/staging/rdma/hfi1/diag.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,8 @@ static ssize_t diagpkt_send(struct diag_pkt *dp)
413413
goto bail;
414414
}
415415
/* can only use kernel contexts */
416-
if (dd->send_contexts[dp->sw_index].type != SC_KERNEL) {
416+
if (dd->send_contexts[dp->sw_index].type != SC_KERNEL &&
417+
dd->send_contexts[dp->sw_index].type != SC_VL15) {
417418
ret = -EINVAL;
418419
goto bail;
419420
}

drivers/staging/rdma/hfi1/pio.c

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -139,23 +139,30 @@ void pio_send_control(struct hfi1_devdata *dd, int op)
139139
/* Send Context Size (SCS) wildcards */
140140
#define SCS_POOL_0 -1
141141
#define SCS_POOL_1 -2
142+
142143
/* Send Context Count (SCC) wildcards */
143144
#define SCC_PER_VL -1
144145
#define SCC_PER_CPU -2
145-
146146
#define SCC_PER_KRCVQ -3
147-
#define SCC_ACK_CREDITS 32
147+
148+
/* Send Context Size (SCS) constants */
149+
#define SCS_ACK_CREDITS 32
150+
#define SCS_VL15_CREDITS 102 /* 3 pkts of 2048B data + 128B header */
151+
152+
#define PIO_THRESHOLD_CEILING 4096
148153

149154
#define PIO_WAIT_BATCH_SIZE 5
150155

151156
/* default send context sizes */
152157
static struct sc_config_sizes sc_config_sizes[SC_MAX] = {
153158
[SC_KERNEL] = { .size = SCS_POOL_0, /* even divide, pool 0 */
154-
.count = SCC_PER_VL },/* one per NUMA */
155-
[SC_ACK] = { .size = SCC_ACK_CREDITS,
159+
.count = SCC_PER_VL }, /* one per NUMA */
160+
[SC_ACK] = { .size = SCS_ACK_CREDITS,
156161
.count = SCC_PER_KRCVQ },
157162
[SC_USER] = { .size = SCS_POOL_0, /* even divide, pool 0 */
158163
.count = SCC_PER_CPU }, /* one per CPU */
164+
[SC_VL15] = { .size = SCS_VL15_CREDITS,
165+
.count = 1 },
159166

160167
};
161168

@@ -202,7 +209,8 @@ static int wildcard_to_pool(int wc)
202209
static const char *sc_type_names[SC_MAX] = {
203210
"kernel",
204211
"ack",
205-
"user"
212+
"user",
213+
"vl15"
206214
};
207215

208216
static const char *sc_type_name(int index)
@@ -230,6 +238,22 @@ int init_sc_pools_and_sizes(struct hfi1_devdata *dd)
230238
int extra;
231239
int i;
232240

241+
/*
242+
* When SDMA is enabled, kernel context pio packet size is capped by
243+
* "piothreshold". Reduce pio buffer allocation for kernel context by
244+
* setting it to a fixed size. The allocation allows 3-deep buffering
245+
* of the largest pio packets plus up to 128 bytes header, sufficient
246+
* to maintain verbs performance.
247+
*
248+
* When SDMA is disabled, keep the default pooling allocation.
249+
*/
250+
if (HFI1_CAP_IS_KSET(SDMA)) {
251+
u16 max_pkt_size = (piothreshold < PIO_THRESHOLD_CEILING) ?
252+
piothreshold : PIO_THRESHOLD_CEILING;
253+
sc_config_sizes[SC_KERNEL].size =
254+
3 * (max_pkt_size + 128) / PIO_BLOCK_SIZE;
255+
}
256+
233257
/*
234258
* Step 0:
235259
* - copy the centipercents/absolute sizes from the pool config
@@ -311,7 +335,7 @@ int init_sc_pools_and_sizes(struct hfi1_devdata *dd)
311335
if (i == SC_ACK) {
312336
count = dd->n_krcv_queues;
313337
} else if (i == SC_KERNEL) {
314-
count = (INIT_SC_PER_VL * num_vls) + 1 /* VL15 */;
338+
count = INIT_SC_PER_VL * num_vls;
315339
} else if (count == SCC_PER_CPU) {
316340
count = dd->num_rcv_contexts - dd->n_krcv_queues;
317341
} else if (count < 0) {
@@ -596,7 +620,7 @@ u32 sc_mtu_to_threshold(struct send_context *sc, u32 mtu, u32 hdrqentsize)
596620
* Return value is what to write into the CSR: trigger return when
597621
* unreturned credits pass this count.
598622
*/
599-
static u32 sc_percent_to_threshold(struct send_context *sc, u32 percent)
623+
u32 sc_percent_to_threshold(struct send_context *sc, u32 percent)
600624
{
601625
return (sc->credits * percent) / 100;
602626
}
@@ -790,15 +814,20 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
790814
* For Ack contexts, set a threshold for half the credits.
791815
* For User contexts use the given percentage. This has been
792816
* sanitized on driver start-up.
793-
* For Kernel contexts, use the default MTU plus a header.
817+
* For Kernel contexts, use the default MTU plus a header
818+
* or half the credits, whichever is smaller. This should
819+
* work for both the 3-deep buffering allocation and the
820+
* pooling allocation.
794821
*/
795822
if (type == SC_ACK) {
796823
thresh = sc_percent_to_threshold(sc, 50);
797824
} else if (type == SC_USER) {
798825
thresh = sc_percent_to_threshold(sc,
799826
user_credit_return_threshold);
800827
} else { /* kernel */
801-
thresh = sc_mtu_to_threshold(sc, hfi1_max_mtu, hdrqentsize);
828+
thresh = min(sc_percent_to_threshold(sc, 50),
829+
sc_mtu_to_threshold(sc, hfi1_max_mtu,
830+
hdrqentsize));
802831
}
803832
reg = thresh << SC(CREDIT_CTRL_THRESHOLD_SHIFT);
804833
/* add in early return */
@@ -1531,7 +1560,8 @@ static void sc_piobufavail(struct send_context *sc)
15311560
unsigned long flags;
15321561
unsigned i, n = 0;
15331562

1534-
if (dd->send_contexts[sc->sw_index].type != SC_KERNEL)
1563+
if (dd->send_contexts[sc->sw_index].type != SC_KERNEL &&
1564+
dd->send_contexts[sc->sw_index].type != SC_VL15)
15351565
return;
15361566
list = &sc->piowait;
15371567
/*
@@ -1900,7 +1930,7 @@ int init_pervl_scs(struct hfi1_devdata *dd)
19001930
u32 ctxt;
19011931
struct hfi1_pportdata *ppd = dd->pport;
19021932

1903-
dd->vld[15].sc = sc_alloc(dd, SC_KERNEL,
1933+
dd->vld[15].sc = sc_alloc(dd, SC_VL15,
19041934
dd->rcd[0]->rcvhdrqentsize, dd->node);
19051935
if (!dd->vld[15].sc)
19061936
goto nomem;

drivers/staging/rdma/hfi1/pio.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@
5151
#define SC_KERNEL 0
5252
#define SC_ACK 1
5353
#define SC_USER 2
54-
#define SC_MAX 3
54+
#define SC_VL15 3
55+
#define SC_MAX 4
5556

5657
/* invalid send context index */
5758
#define INVALID_SCI 0xff
@@ -293,6 +294,7 @@ void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context);
293294
void sc_add_credit_return_intr(struct send_context *sc);
294295
void sc_del_credit_return_intr(struct send_context *sc);
295296
void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold);
297+
u32 sc_percent_to_threshold(struct send_context *sc, u32 percent);
296298
u32 sc_mtu_to_threshold(struct send_context *sc, u32 mtu, u32 hdrqentsize);
297299
void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint);
298300
void sc_wait(struct hfi1_devdata *dd);

0 commit comments

Comments
 (0)