|
| 1 | +// SPDX-License-Identifier: LGPL-2.1 |
| 2 | +#define _GNU_SOURCE |
| 3 | +#include <assert.h> |
| 4 | +#include <pthread.h> |
| 5 | +#include <sched.h> |
| 6 | +#include <stdint.h> |
| 7 | +#include <stdio.h> |
| 8 | +#include <stdlib.h> |
| 9 | +#include <string.h> |
| 10 | +#include <stddef.h> |
| 11 | + |
| 12 | +#include "rseq.h" |
| 13 | + |
| 14 | +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) |
| 15 | + |
| 16 | +struct percpu_lock_entry { |
| 17 | + intptr_t v; |
| 18 | +} __attribute__((aligned(128))); |
| 19 | + |
| 20 | +struct percpu_lock { |
| 21 | + struct percpu_lock_entry c[CPU_SETSIZE]; |
| 22 | +}; |
| 23 | + |
| 24 | +struct test_data_entry { |
| 25 | + intptr_t count; |
| 26 | +} __attribute__((aligned(128))); |
| 27 | + |
| 28 | +struct spinlock_test_data { |
| 29 | + struct percpu_lock lock; |
| 30 | + struct test_data_entry c[CPU_SETSIZE]; |
| 31 | + int reps; |
| 32 | +}; |
| 33 | + |
| 34 | +struct percpu_list_node { |
| 35 | + intptr_t data; |
| 36 | + struct percpu_list_node *next; |
| 37 | +}; |
| 38 | + |
| 39 | +struct percpu_list_entry { |
| 40 | + struct percpu_list_node *head; |
| 41 | +} __attribute__((aligned(128))); |
| 42 | + |
| 43 | +struct percpu_list { |
| 44 | + struct percpu_list_entry c[CPU_SETSIZE]; |
| 45 | +}; |
| 46 | + |
| 47 | +/* A simple percpu spinlock. Returns the cpu lock was acquired on. */ |
| 48 | +int rseq_this_cpu_lock(struct percpu_lock *lock) |
| 49 | +{ |
| 50 | + int cpu; |
| 51 | + |
| 52 | + for (;;) { |
| 53 | + int ret; |
| 54 | + |
| 55 | + cpu = rseq_cpu_start(); |
| 56 | + ret = rseq_cmpeqv_storev(&lock->c[cpu].v, |
| 57 | + 0, 1, cpu); |
| 58 | + if (rseq_likely(!ret)) |
| 59 | + break; |
| 60 | + /* Retry if comparison fails or rseq aborts. */ |
| 61 | + } |
| 62 | + /* |
| 63 | + * Acquire semantic when taking lock after control dependency. |
| 64 | + * Matches rseq_smp_store_release(). |
| 65 | + */ |
| 66 | + rseq_smp_acquire__after_ctrl_dep(); |
| 67 | + return cpu; |
| 68 | +} |
| 69 | + |
| 70 | +void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) |
| 71 | +{ |
| 72 | + assert(lock->c[cpu].v == 1); |
| 73 | + /* |
| 74 | + * Release lock, with release semantic. Matches |
| 75 | + * rseq_smp_acquire__after_ctrl_dep(). |
| 76 | + */ |
| 77 | + rseq_smp_store_release(&lock->c[cpu].v, 0); |
| 78 | +} |
| 79 | + |
| 80 | +void *test_percpu_spinlock_thread(void *arg) |
| 81 | +{ |
| 82 | + struct spinlock_test_data *data = arg; |
| 83 | + int i, cpu; |
| 84 | + |
| 85 | + if (rseq_register_current_thread()) { |
| 86 | + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", |
| 87 | + errno, strerror(errno)); |
| 88 | + abort(); |
| 89 | + } |
| 90 | + for (i = 0; i < data->reps; i++) { |
| 91 | + cpu = rseq_this_cpu_lock(&data->lock); |
| 92 | + data->c[cpu].count++; |
| 93 | + rseq_percpu_unlock(&data->lock, cpu); |
| 94 | + } |
| 95 | + if (rseq_unregister_current_thread()) { |
| 96 | + fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", |
| 97 | + errno, strerror(errno)); |
| 98 | + abort(); |
| 99 | + } |
| 100 | + |
| 101 | + return NULL; |
| 102 | +} |
| 103 | + |
| 104 | +/* |
| 105 | + * A simple test which implements a sharded counter using a per-cpu |
| 106 | + * lock. Obviously real applications might prefer to simply use a |
| 107 | + * per-cpu increment; however, this is reasonable for a test and the |
| 108 | + * lock can be extended to synchronize more complicated operations. |
| 109 | + */ |
| 110 | +void test_percpu_spinlock(void) |
| 111 | +{ |
| 112 | + const int num_threads = 200; |
| 113 | + int i; |
| 114 | + uint64_t sum; |
| 115 | + pthread_t test_threads[num_threads]; |
| 116 | + struct spinlock_test_data data; |
| 117 | + |
| 118 | + memset(&data, 0, sizeof(data)); |
| 119 | + data.reps = 5000; |
| 120 | + |
| 121 | + for (i = 0; i < num_threads; i++) |
| 122 | + pthread_create(&test_threads[i], NULL, |
| 123 | + test_percpu_spinlock_thread, &data); |
| 124 | + |
| 125 | + for (i = 0; i < num_threads; i++) |
| 126 | + pthread_join(test_threads[i], NULL); |
| 127 | + |
| 128 | + sum = 0; |
| 129 | + for (i = 0; i < CPU_SETSIZE; i++) |
| 130 | + sum += data.c[i].count; |
| 131 | + |
| 132 | + assert(sum == (uint64_t)data.reps * num_threads); |
| 133 | +} |
| 134 | + |
| 135 | +void this_cpu_list_push(struct percpu_list *list, |
| 136 | + struct percpu_list_node *node, |
| 137 | + int *_cpu) |
| 138 | +{ |
| 139 | + int cpu; |
| 140 | + |
| 141 | + for (;;) { |
| 142 | + intptr_t *targetptr, newval, expect; |
| 143 | + int ret; |
| 144 | + |
| 145 | + cpu = rseq_cpu_start(); |
| 146 | + /* Load list->c[cpu].head with single-copy atomicity. */ |
| 147 | + expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); |
| 148 | + newval = (intptr_t)node; |
| 149 | + targetptr = (intptr_t *)&list->c[cpu].head; |
| 150 | + node->next = (struct percpu_list_node *)expect; |
| 151 | + ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu); |
| 152 | + if (rseq_likely(!ret)) |
| 153 | + break; |
| 154 | + /* Retry if comparison fails or rseq aborts. */ |
| 155 | + } |
| 156 | + if (_cpu) |
| 157 | + *_cpu = cpu; |
| 158 | +} |
| 159 | + |
| 160 | +/* |
| 161 | + * Unlike a traditional lock-less linked list; the availability of a |
| 162 | + * rseq primitive allows us to implement pop without concerns over |
| 163 | + * ABA-type races. |
| 164 | + */ |
| 165 | +struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, |
| 166 | + int *_cpu) |
| 167 | +{ |
| 168 | + for (;;) { |
| 169 | + struct percpu_list_node *head; |
| 170 | + intptr_t *targetptr, expectnot, *load; |
| 171 | + off_t offset; |
| 172 | + int ret, cpu; |
| 173 | + |
| 174 | + cpu = rseq_cpu_start(); |
| 175 | + targetptr = (intptr_t *)&list->c[cpu].head; |
| 176 | + expectnot = (intptr_t)NULL; |
| 177 | + offset = offsetof(struct percpu_list_node, next); |
| 178 | + load = (intptr_t *)&head; |
| 179 | + ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot, |
| 180 | + offset, load, cpu); |
| 181 | + if (rseq_likely(!ret)) { |
| 182 | + if (_cpu) |
| 183 | + *_cpu = cpu; |
| 184 | + return head; |
| 185 | + } |
| 186 | + if (ret > 0) |
| 187 | + return NULL; |
| 188 | + /* Retry if rseq aborts. */ |
| 189 | + } |
| 190 | +} |
| 191 | + |
| 192 | +/* |
| 193 | + * __percpu_list_pop is not safe against concurrent accesses. Should |
| 194 | + * only be used on lists that are not concurrently modified. |
| 195 | + */ |
| 196 | +struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) |
| 197 | +{ |
| 198 | + struct percpu_list_node *node; |
| 199 | + |
| 200 | + node = list->c[cpu].head; |
| 201 | + if (!node) |
| 202 | + return NULL; |
| 203 | + list->c[cpu].head = node->next; |
| 204 | + return node; |
| 205 | +} |
| 206 | + |
| 207 | +void *test_percpu_list_thread(void *arg) |
| 208 | +{ |
| 209 | + int i; |
| 210 | + struct percpu_list *list = (struct percpu_list *)arg; |
| 211 | + |
| 212 | + if (rseq_register_current_thread()) { |
| 213 | + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", |
| 214 | + errno, strerror(errno)); |
| 215 | + abort(); |
| 216 | + } |
| 217 | + |
| 218 | + for (i = 0; i < 100000; i++) { |
| 219 | + struct percpu_list_node *node; |
| 220 | + |
| 221 | + node = this_cpu_list_pop(list, NULL); |
| 222 | + sched_yield(); /* encourage shuffling */ |
| 223 | + if (node) |
| 224 | + this_cpu_list_push(list, node, NULL); |
| 225 | + } |
| 226 | + |
| 227 | + if (rseq_unregister_current_thread()) { |
| 228 | + fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", |
| 229 | + errno, strerror(errno)); |
| 230 | + abort(); |
| 231 | + } |
| 232 | + |
| 233 | + return NULL; |
| 234 | +} |
| 235 | + |
| 236 | +/* Simultaneous modification to a per-cpu linked list from many threads. */ |
| 237 | +void test_percpu_list(void) |
| 238 | +{ |
| 239 | + int i, j; |
| 240 | + uint64_t sum = 0, expected_sum = 0; |
| 241 | + struct percpu_list list; |
| 242 | + pthread_t test_threads[200]; |
| 243 | + cpu_set_t allowed_cpus; |
| 244 | + |
| 245 | + memset(&list, 0, sizeof(list)); |
| 246 | + |
| 247 | + /* Generate list entries for every usable cpu. */ |
| 248 | + sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); |
| 249 | + for (i = 0; i < CPU_SETSIZE; i++) { |
| 250 | + if (!CPU_ISSET(i, &allowed_cpus)) |
| 251 | + continue; |
| 252 | + for (j = 1; j <= 100; j++) { |
| 253 | + struct percpu_list_node *node; |
| 254 | + |
| 255 | + expected_sum += j; |
| 256 | + |
| 257 | + node = malloc(sizeof(*node)); |
| 258 | + assert(node); |
| 259 | + node->data = j; |
| 260 | + node->next = list.c[i].head; |
| 261 | + list.c[i].head = node; |
| 262 | + } |
| 263 | + } |
| 264 | + |
| 265 | + for (i = 0; i < 200; i++) |
| 266 | + pthread_create(&test_threads[i], NULL, |
| 267 | + test_percpu_list_thread, &list); |
| 268 | + |
| 269 | + for (i = 0; i < 200; i++) |
| 270 | + pthread_join(test_threads[i], NULL); |
| 271 | + |
| 272 | + for (i = 0; i < CPU_SETSIZE; i++) { |
| 273 | + struct percpu_list_node *node; |
| 274 | + |
| 275 | + if (!CPU_ISSET(i, &allowed_cpus)) |
| 276 | + continue; |
| 277 | + |
| 278 | + while ((node = __percpu_list_pop(&list, i))) { |
| 279 | + sum += node->data; |
| 280 | + free(node); |
| 281 | + } |
| 282 | + } |
| 283 | + |
| 284 | + /* |
| 285 | + * All entries should now be accounted for (unless some external |
| 286 | + * actor is interfering with our allowed affinity while this |
| 287 | + * test is running). |
| 288 | + */ |
| 289 | + assert(sum == expected_sum); |
| 290 | +} |
| 291 | + |
| 292 | +int main(int argc, char **argv) |
| 293 | +{ |
| 294 | + if (rseq_register_current_thread()) { |
| 295 | + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", |
| 296 | + errno, strerror(errno)); |
| 297 | + goto error; |
| 298 | + } |
| 299 | + printf("spinlock\n"); |
| 300 | + test_percpu_spinlock(); |
| 301 | + printf("percpu_list\n"); |
| 302 | + test_percpu_list(); |
| 303 | + if (rseq_unregister_current_thread()) { |
| 304 | + fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", |
| 305 | + errno, strerror(errno)); |
| 306 | + goto error; |
| 307 | + } |
| 308 | + return 0; |
| 309 | + |
| 310 | +error: |
| 311 | + return -1; |
| 312 | +} |
0 commit comments