Skip to content

Commit 0852d24

Browse files
jubinjohndledford
authored andcommitted
IB/hfi1: Change default number of user contexts
Change the default number of user contexts to the number of real (non-HT) cpu cores in order to reduce the division of hfi1 hardware contexts in the case of high core counts with hyper-threading enabled. Reviewed-by: Dean Luick <[email protected]> Reviewed-by: Dennis Dalessandro <[email protected]> Reviewed-by: Mitko Haralanov <[email protected]> Signed-off-by: Jubin John <[email protected]> Signed-off-by: Doug Ledford <[email protected]>
1 parent b218f78 commit 0852d24

File tree

3 files changed

+84
-54
lines changed

3 files changed

+84
-54
lines changed

drivers/staging/rdma/hfi1/affinity.c

Lines changed: 47 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -53,20 +53,6 @@
5353
#include "sdma.h"
5454
#include "trace.h"
5555

56-
struct cpu_mask_set {
57-
struct cpumask mask;
58-
struct cpumask used;
59-
uint gen;
60-
};
61-
62-
struct hfi1_affinity {
63-
struct cpu_mask_set def_intr;
64-
struct cpu_mask_set rcv_intr;
65-
struct cpu_mask_set proc;
66-
/* spin lock to protect affinity struct */
67-
spinlock_t lock;
68-
};
69-
7056
/* Name of IRQ types, indexed by enum irq_type */
7157
static const char * const irq_type_names[] = {
7258
"SDMA",
@@ -82,6 +68,48 @@ static inline void init_cpu_mask_set(struct cpu_mask_set *set)
8268
set->gen = 0;
8369
}
8470

71+
/* Initialize non-HT cpu cores mask */
72+
int init_real_cpu_mask(struct hfi1_devdata *dd)
73+
{
74+
struct hfi1_affinity *info;
75+
int possible, curr_cpu, i, ht;
76+
77+
info = kzalloc(sizeof(*info), GFP_KERNEL);
78+
if (!info)
79+
return -ENOMEM;
80+
81+
cpumask_clear(&info->real_cpu_mask);
82+
83+
/* Start with cpu online mask as the real cpu mask */
84+
cpumask_copy(&info->real_cpu_mask, cpu_online_mask);
85+
86+
/*
87+
* Remove HT cores from the real cpu mask. Do this in two steps below.
88+
*/
89+
possible = cpumask_weight(&info->real_cpu_mask);
90+
ht = cpumask_weight(topology_sibling_cpumask(
91+
cpumask_first(&info->real_cpu_mask)));
92+
/*
93+
* Step 1. Skip over the first N HT siblings and use them as the
94+
* "real" cores. Assumes that HT cores are not enumerated in
95+
* succession (except in the single core case).
96+
*/
97+
curr_cpu = cpumask_first(&info->real_cpu_mask);
98+
for (i = 0; i < possible / ht; i++)
99+
curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask);
100+
/*
101+
* Step 2. Remove the remaining HT siblings. Use cpumask_next() to
102+
* skip any gaps.
103+
*/
104+
for (; i < possible; i++) {
105+
cpumask_clear_cpu(curr_cpu, &info->real_cpu_mask);
106+
curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask);
107+
}
108+
109+
dd->affinity = info;
110+
return 0;
111+
}
112+
85113
/*
86114
* Interrupt affinity.
87115
*
@@ -93,20 +121,17 @@ static inline void init_cpu_mask_set(struct cpu_mask_set *set)
93121
* to the node relative 1 as necessary.
94122
*
95123
*/
96-
int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
124+
void hfi1_dev_affinity_init(struct hfi1_devdata *dd)
97125
{
98126
int node = pcibus_to_node(dd->pcidev->bus);
99-
struct hfi1_affinity *info;
127+
struct hfi1_affinity *info = dd->affinity;
100128
const struct cpumask *local_mask;
101-
int curr_cpu, possible, i, ht;
129+
int curr_cpu, possible, i;
102130

103131
if (node < 0)
104132
node = numa_node_id();
105133
dd->node = node;
106134

107-
info = kzalloc(sizeof(*info), GFP_KERNEL);
108-
if (!info)
109-
return -ENOMEM;
110135
spin_lock_init(&info->lock);
111136

112137
init_cpu_mask_set(&info->def_intr);
@@ -116,30 +141,8 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
116141
local_mask = cpumask_of_node(dd->node);
117142
if (cpumask_first(local_mask) >= nr_cpu_ids)
118143
local_mask = topology_core_cpumask(0);
119-
/* use local mask as default */
120-
cpumask_copy(&info->def_intr.mask, local_mask);
121-
/*
122-
* Remove HT cores from the default mask. Do this in two steps below.
123-
*/
124-
possible = cpumask_weight(&info->def_intr.mask);
125-
ht = cpumask_weight(topology_sibling_cpumask(
126-
cpumask_first(&info->def_intr.mask)));
127-
/*
128-
* Step 1. Skip over the first N HT siblings and use them as the
129-
* "real" cores. Assumes that HT cores are not enumerated in
130-
* succession (except in the single core case).
131-
*/
132-
curr_cpu = cpumask_first(&info->def_intr.mask);
133-
for (i = 0; i < possible / ht; i++)
134-
curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
135-
/*
136-
* Step 2. Remove the remaining HT siblings. Use cpumask_next() to
137-
* skip any gaps.
138-
*/
139-
for (; i < possible; i++) {
140-
cpumask_clear_cpu(curr_cpu, &info->def_intr.mask);
141-
curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
142-
}
144+
/* Use the "real" cpu mask of this node as the default */
145+
cpumask_and(&info->def_intr.mask, &info->real_cpu_mask, local_mask);
143146

144147
/* fill in the receive list */
145148
possible = cpumask_weight(&info->def_intr.mask);
@@ -167,8 +170,6 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
167170
}
168171

169172
cpumask_copy(&info->proc.mask, cpu_online_mask);
170-
dd->affinity = info;
171-
return 0;
172173
}
173174

174175
void hfi1_dev_affinity_free(struct hfi1_devdata *dd)

drivers/staging/rdma/hfi1/affinity.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,27 @@ enum affinity_flags {
6464
AFF_IRQ_LOCAL
6565
};
6666

67+
struct cpu_mask_set {
68+
struct cpumask mask;
69+
struct cpumask used;
70+
uint gen;
71+
};
72+
73+
struct hfi1_affinity {
74+
struct cpu_mask_set def_intr;
75+
struct cpu_mask_set rcv_intr;
76+
struct cpu_mask_set proc;
77+
struct cpumask real_cpu_mask;
78+
/* spin lock to protect affinity struct */
79+
spinlock_t lock;
80+
};
81+
6782
struct hfi1_msix_entry;
6883

84+
/* Initialize non-HT cpu cores mask */
85+
int init_real_cpu_mask(struct hfi1_devdata *);
6986
/* Initialize driver affinity data */
70-
int hfi1_dev_affinity_init(struct hfi1_devdata *);
87+
void hfi1_dev_affinity_init(struct hfi1_devdata *);
7188
/* Free driver affinity data */
7289
void hfi1_dev_affinity_free(struct hfi1_devdata *);
7390
/*

drivers/staging/rdma/hfi1/chip.c

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12656,12 +12656,13 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
1265612656
num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
1265712657
}
1265812658
/*
12659-
* User contexts: (to be fixed later)
12660-
* - default to 1 user context per CPU if num_user_contexts is
12661-
* negative
12659+
* User contexts:
12660+
* - default to 1 user context per real (non-HT) CPU core if
12661+
* num_user_contexts is negative
1266212662
*/
1266312663
if (num_user_contexts < 0)
12664-
num_user_contexts = num_online_cpus();
12664+
num_user_contexts =
12665+
cpumask_weight(&dd->affinity->real_cpu_mask);
1266512666

1266612667
total_contexts = num_kernel_contexts + num_user_contexts;
1266712668

@@ -14089,6 +14090,19 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
1408914090
(dd->revision >> CCE_REVISION_SW_SHIFT)
1409014091
& CCE_REVISION_SW_MASK);
1409114092

14093+
/*
14094+
* The real cpu mask is part of the affinity struct but has to be
14095+
* initialized earlier than the rest of the affinity struct because it
14096+
* is needed to calculate the number of user contexts in
14097+
* set_up_context_variables(). However, hfi1_dev_affinity_init(),
14098+
* which initializes the rest of the affinity struct members,
14099+
* depends on set_up_context_variables() for the number of kernel
14100+
* contexts, so it cannot be called before set_up_context_variables().
14101+
*/
14102+
ret = init_real_cpu_mask(dd);
14103+
if (ret)
14104+
goto bail_cleanup;
14105+
1409214106
ret = set_up_context_variables(dd);
1409314107
if (ret)
1409414108
goto bail_cleanup;
@@ -14102,9 +14116,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
1410214116
/* set up KDETH QP prefix in both RX and TX CSRs */
1410314117
init_kdeth_qp(dd);
1410414118

14105-
ret = hfi1_dev_affinity_init(dd);
14106-
if (ret)
14107-
goto bail_cleanup;
14119+
hfi1_dev_affinity_init(dd);
1410814120

1410914121
/* send contexts must be set up before receive contexts */
1411014122
ret = init_send_contexts(dd);

0 commit comments

Comments
 (0)