Skip to content

Commit 80367ad

Browse files
Sebastian Andrzej SiewiorPeter Zijlstra
authored andcommitted
futex: Add basic infrastructure for local task local hash
The futex hash is system wide and shared by all tasks. Each slot is hashed based on futex address and the VMA of the thread. Due to randomized VMAs (and memory allocations) the same logical lock (pointer) can end up in a different hash bucket on each invocation of the application. This in turn means that different applications may share a hash bucket on the first invocation but not on the second and it is not always clear which applications will be involved. This can result in high latency's to acquire the futex_hash_bucket::lock especially if the lock owner is limited to a CPU and can not be effectively PI boosted. Introduce basic infrastructure for process local hash which is shared by all threads of process. This hash will only be used for a PROCESS_PRIVATE FUTEX operation. The hashmap can be allocated via: prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, num); A `num' of 0 means that the global hash is used instead of a private hash. Other values for `num' specify the number of slots for the hash and the number must be power of two, starting with two. The prctl() returns zero on success. This function can only be used before a thread is created. The current status for the private hash can be queried via: num = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS); which return the current number of slots. The value 0 means that the global hash is used. Values greater than 0 indicate the number of slots that are used. A negative number indicates an error. For optimisation, for the private hash jhash2() uses only two arguments the address and the offset. This omits the VMA which is always the same. [peterz: Use 0 for global hash. A bit shuffling and renaming. ] Signed-off-by: Sebastian Andrzej Siewior <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 9a9bdfd commit 80367ad

File tree

8 files changed

+244
-21
lines changed

8 files changed

+244
-21
lines changed

include/linux/futex.h

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44

55
#include <linux/sched.h>
66
#include <linux/ktime.h>
7+
#include <linux/mm_types.h>
78

89
#include <uapi/linux/futex.h>
910

1011
struct inode;
11-
struct mm_struct;
1212
struct task_struct;
1313

1414
/*
@@ -77,7 +77,22 @@ void futex_exec_release(struct task_struct *tsk);
7777

7878
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
7979
u32 __user *uaddr2, u32 val2, u32 val3);
80-
#else
80+
int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4);
81+
82+
#ifdef CONFIG_FUTEX_PRIVATE_HASH
83+
void futex_hash_free(struct mm_struct *mm);
84+
85+
static inline void futex_mm_init(struct mm_struct *mm)
86+
{
87+
mm->futex_phash = NULL;
88+
}
89+
90+
#else /* !CONFIG_FUTEX_PRIVATE_HASH */
91+
static inline void futex_hash_free(struct mm_struct *mm) { }
92+
static inline void futex_mm_init(struct mm_struct *mm) { }
93+
#endif /* CONFIG_FUTEX_PRIVATE_HASH */
94+
95+
#else /* !CONFIG_FUTEX */
8196
static inline void futex_init_task(struct task_struct *tsk) { }
8297
static inline void futex_exit_recursive(struct task_struct *tsk) { }
8398
static inline void futex_exit_release(struct task_struct *tsk) { }
@@ -88,6 +103,13 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val,
88103
{
89104
return -EINVAL;
90105
}
106+
static inline int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4)
107+
{
108+
return -EINVAL;
109+
}
110+
static inline void futex_hash_free(struct mm_struct *mm) { }
111+
static inline void futex_mm_init(struct mm_struct *mm) { }
112+
91113
#endif
92114

93115
#endif

include/linux/mm_types.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#define INIT_PASID 0
3232

3333
struct address_space;
34+
struct futex_private_hash;
3435
struct mem_cgroup;
3536

3637
/*
@@ -1031,7 +1032,9 @@ struct mm_struct {
10311032
*/
10321033
seqcount_t mm_lock_seq;
10331034
#endif
1034-
1035+
#ifdef CONFIG_FUTEX_PRIVATE_HASH
1036+
struct futex_private_hash *futex_phash;
1037+
#endif
10351038

10361039
unsigned long hiwater_rss; /* High-watermark of RSS usage */
10371040
unsigned long hiwater_vm; /* High-water virtual memory usage */

include/uapi/linux/prctl.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,4 +364,9 @@ struct prctl_mm_map {
364364
# define PR_TIMER_CREATE_RESTORE_IDS_ON 1
365365
# define PR_TIMER_CREATE_RESTORE_IDS_GET 2
366366

367+
/* FUTEX hash management */
368+
#define PR_FUTEX_HASH 78
369+
# define PR_FUTEX_HASH_SET_SLOTS 1
370+
# define PR_FUTEX_HASH_GET_SLOTS 2
371+
367372
#endif /* _LINUX_PRCTL_H */

init/Kconfig

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1699,6 +1699,11 @@ config FUTEX_PI
16991699
depends on FUTEX && RT_MUTEXES
17001700
default y
17011701

1702+
config FUTEX_PRIVATE_HASH
1703+
bool
1704+
depends on FUTEX && !BASE_SMALL && MMU
1705+
default y
1706+
17021707
config EPOLL
17031708
bool "Enable eventpoll support" if EXPERT
17041709
default y

kernel/fork.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1305,6 +1305,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
13051305
RCU_INIT_POINTER(mm->exe_file, NULL);
13061306
mmu_notifier_subscriptions_init(mm);
13071307
init_tlb_flush_pending(mm);
1308+
futex_mm_init(mm);
13081309
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !defined(CONFIG_SPLIT_PMD_PTLOCKS)
13091310
mm->pmd_huge_pte = NULL;
13101311
#endif
@@ -1387,6 +1388,7 @@ static inline void __mmput(struct mm_struct *mm)
13871388
if (mm->binfmt)
13881389
module_put(mm->binfmt->module);
13891390
lru_gen_del_mm(mm);
1391+
futex_hash_free(mm);
13901392
mmdrop(mm);
13911393
}
13921394

kernel/futex/core.c

Lines changed: 190 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include <linux/memblock.h>
4040
#include <linux/fault-inject.h>
4141
#include <linux/slab.h>
42+
#include <linux/prctl.h>
4243

4344
#include "futex.h"
4445
#include "../locking/rtmutex_common.h"
@@ -55,6 +56,12 @@ static struct {
5556
#define futex_queues (__futex_data.queues)
5657
#define futex_hashmask (__futex_data.hashmask)
5758

59+
struct futex_private_hash {
60+
unsigned int hash_mask;
61+
void *mm;
62+
bool custom;
63+
struct futex_hash_bucket queues[];
64+
};
5865

5966
/*
6067
* Fault injections for futexes.
@@ -107,31 +114,26 @@ late_initcall(fail_futex_debugfs);
107114

108115
#endif /* CONFIG_FAIL_FUTEX */
109116

110-
struct futex_private_hash *futex_private_hash(void)
117+
static struct futex_hash_bucket *
118+
__futex_hash(union futex_key *key, struct futex_private_hash *fph);
119+
120+
#ifdef CONFIG_FUTEX_PRIVATE_HASH
121+
static inline bool futex_key_is_private(union futex_key *key)
111122
{
112-
return NULL;
123+
/*
124+
* Relies on get_futex_key() to set either bit for shared
125+
* futexes -- see comment with union futex_key.
126+
*/
127+
return !(key->both.offset & (FUT_OFF_INODE | FUT_OFF_MMSHARED));
113128
}
114129

115130
bool futex_private_hash_get(struct futex_private_hash *fph)
116131
{
117132
return false;
118133
}
119134

120-
void futex_private_hash_put(struct futex_private_hash *fph) { }
121-
122-
/**
123-
* futex_hash - Return the hash bucket in the global hash
124-
* @key: Pointer to the futex key for which the hash is calculated
125-
*
126-
* We hash on the keys returned from get_futex_key (see below) and return the
127-
* corresponding hash bucket in the global hash.
128-
*/
129-
struct futex_hash_bucket *futex_hash(union futex_key *key)
135+
void futex_private_hash_put(struct futex_private_hash *fph)
130136
{
131-
u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
132-
key->both.offset);
133-
134-
return &futex_queues[hash & futex_hashmask];
135137
}
136138

137139
/**
@@ -144,6 +146,84 @@ struct futex_hash_bucket *futex_hash(union futex_key *key)
144146
void futex_hash_get(struct futex_hash_bucket *hb) { }
145147
void futex_hash_put(struct futex_hash_bucket *hb) { }
146148

149+
static struct futex_hash_bucket *
150+
__futex_hash_private(union futex_key *key, struct futex_private_hash *fph)
151+
{
152+
u32 hash;
153+
154+
if (!futex_key_is_private(key))
155+
return NULL;
156+
157+
if (!fph)
158+
fph = key->private.mm->futex_phash;
159+
if (!fph || !fph->hash_mask)
160+
return NULL;
161+
162+
hash = jhash2((void *)&key->private.address,
163+
sizeof(key->private.address) / 4,
164+
key->both.offset);
165+
return &fph->queues[hash & fph->hash_mask];
166+
}
167+
168+
struct futex_private_hash *futex_private_hash(void)
169+
{
170+
struct mm_struct *mm = current->mm;
171+
struct futex_private_hash *fph;
172+
173+
fph = mm->futex_phash;
174+
return fph;
175+
}
176+
177+
struct futex_hash_bucket *futex_hash(union futex_key *key)
178+
{
179+
struct futex_hash_bucket *hb;
180+
181+
hb = __futex_hash(key, NULL);
182+
return hb;
183+
}
184+
185+
#else /* !CONFIG_FUTEX_PRIVATE_HASH */
186+
187+
static struct futex_hash_bucket *
188+
__futex_hash_private(union futex_key *key, struct futex_private_hash *fph)
189+
{
190+
return NULL;
191+
}
192+
193+
struct futex_hash_bucket *futex_hash(union futex_key *key)
194+
{
195+
return __futex_hash(key, NULL);
196+
}
197+
198+
#endif /* CONFIG_FUTEX_PRIVATE_HASH */
199+
200+
/**
201+
* __futex_hash - Return the hash bucket
202+
* @key: Pointer to the futex key for which the hash is calculated
203+
* @fph: Pointer to private hash if known
204+
*
205+
* We hash on the keys returned from get_futex_key (see below) and return the
206+
* corresponding hash bucket.
207+
* If the FUTEX is PROCESS_PRIVATE then a per-process hash bucket (from the
208+
* private hash) is returned if existing. Otherwise a hash bucket from the
209+
* global hash is returned.
210+
*/
211+
static struct futex_hash_bucket *
212+
__futex_hash(union futex_key *key, struct futex_private_hash *fph)
213+
{
214+
struct futex_hash_bucket *hb;
215+
u32 hash;
216+
217+
hb = __futex_hash_private(key, fph);
218+
if (hb)
219+
return hb;
220+
221+
hash = jhash2((u32 *)key,
222+
offsetof(typeof(*key), both.offset) / 4,
223+
key->both.offset);
224+
return &futex_queues[hash & futex_hashmask];
225+
}
226+
147227
/**
148228
* futex_setup_timer - set up the sleeping hrtimer.
149229
* @time: ptr to the given timeout value
@@ -985,6 +1065,13 @@ static void exit_pi_state_list(struct task_struct *curr)
9851065
struct futex_pi_state *pi_state;
9861066
union futex_key key = FUTEX_KEY_INIT;
9871067

1068+
/*
1069+
* Ensure the hash remains stable (no resize) during the while loop
1070+
* below. The hb pointer is acquired under the pi_lock so we can't block
1071+
* on the mutex.
1072+
*/
1073+
WARN_ON(curr != current);
1074+
guard(private_hash)();
9881075
/*
9891076
* We are a ZOMBIE and nobody can enqueue itself on
9901077
* pi_state_list anymore, but we have to be careful
@@ -1160,13 +1247,98 @@ void futex_exit_release(struct task_struct *tsk)
11601247
futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
11611248
}
11621249

1163-
static void futex_hash_bucket_init(struct futex_hash_bucket *fhb)
1250+
static void futex_hash_bucket_init(struct futex_hash_bucket *fhb,
1251+
struct futex_private_hash *fph)
11641252
{
1253+
#ifdef CONFIG_FUTEX_PRIVATE_HASH
1254+
fhb->priv = fph;
1255+
#endif
11651256
atomic_set(&fhb->waiters, 0);
11661257
plist_head_init(&fhb->chain);
11671258
spin_lock_init(&fhb->lock);
11681259
}
11691260

1261+
#ifdef CONFIG_FUTEX_PRIVATE_HASH
1262+
void futex_hash_free(struct mm_struct *mm)
1263+
{
1264+
kvfree(mm->futex_phash);
1265+
}
1266+
1267+
static int futex_hash_allocate(unsigned int hash_slots, bool custom)
1268+
{
1269+
struct mm_struct *mm = current->mm;
1270+
struct futex_private_hash *fph;
1271+
int i;
1272+
1273+
if (hash_slots && (hash_slots == 1 || !is_power_of_2(hash_slots)))
1274+
return -EINVAL;
1275+
1276+
if (mm->futex_phash)
1277+
return -EALREADY;
1278+
1279+
if (!thread_group_empty(current))
1280+
return -EINVAL;
1281+
1282+
fph = kvzalloc(struct_size(fph, queues, hash_slots), GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
1283+
if (!fph)
1284+
return -ENOMEM;
1285+
1286+
fph->hash_mask = hash_slots ? hash_slots - 1 : 0;
1287+
fph->custom = custom;
1288+
fph->mm = mm;
1289+
1290+
for (i = 0; i < hash_slots; i++)
1291+
futex_hash_bucket_init(&fph->queues[i], fph);
1292+
1293+
mm->futex_phash = fph;
1294+
return 0;
1295+
}
1296+
1297+
static int futex_hash_get_slots(void)
1298+
{
1299+
struct futex_private_hash *fph;
1300+
1301+
fph = current->mm->futex_phash;
1302+
if (fph && fph->hash_mask)
1303+
return fph->hash_mask + 1;
1304+
return 0;
1305+
}
1306+
1307+
#else
1308+
1309+
static int futex_hash_allocate(unsigned int hash_slots, bool custom)
1310+
{
1311+
return -EINVAL;
1312+
}
1313+
1314+
static int futex_hash_get_slots(void)
1315+
{
1316+
return 0;
1317+
}
1318+
#endif
1319+
1320+
int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4)
1321+
{
1322+
int ret;
1323+
1324+
switch (arg2) {
1325+
case PR_FUTEX_HASH_SET_SLOTS:
1326+
if (arg4 != 0)
1327+
return -EINVAL;
1328+
ret = futex_hash_allocate(arg3, true);
1329+
break;
1330+
1331+
case PR_FUTEX_HASH_GET_SLOTS:
1332+
ret = futex_hash_get_slots();
1333+
break;
1334+
1335+
default:
1336+
ret = -EINVAL;
1337+
break;
1338+
}
1339+
return ret;
1340+
}
1341+
11701342
static int __init futex_init(void)
11711343
{
11721344
unsigned long hashsize, i;
@@ -1185,7 +1357,7 @@ static int __init futex_init(void)
11851357
hashsize = 1UL << futex_shift;
11861358

11871359
for (i = 0; i < hashsize; i++)
1188-
futex_hash_bucket_init(&futex_queues[i]);
1360+
futex_hash_bucket_init(&futex_queues[i], NULL);
11891361

11901362
futex_hashmask = hashsize - 1;
11911363
return 0;

0 commit comments

Comments
 (0)