Skip to content

Commit 3e344ab

Browse files
George SpelvinSomasundaram Krishnasamy
authored andcommitted
lib/sort: avoid indirect calls to built-in swap
[ Upstream commit 8fb583c ] Similar to what's being done in the net code, this takes advantage of the fact that most invocations use only a few common swap functions, and replaces indirect calls to them with (highly predictable) conditional branches. (The downside, of course, is that if you *do* use a custom swap function, there are a few extra predicted branches on the code path.) This actually *shrinks* the x86-64 code, because it inlines the various swap functions inside do_swap, eliding function prologues & epilogues. x86-64 code size 767 -> 703 bytes (-64) Link: http://lkml.kernel.org/r/d10c5d4b393a1847f32f5b26f4bbaa2857140e1e.1552704200.git.lkml@sdf.org Signed-off-by: George Spelvin <[email protected]> Acked-by: Andrey Abramov <[email protected]> Acked-by: Rasmus Villemoes <[email protected]> Reviewed-by: Andy Shevchenko <[email protected]> Cc: Daniel Wagner <[email protected]> Cc: Dave Chinner <[email protected]> Cc: Don Mullis <[email protected]> Cc: Geert Uytterhoeven <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]> (cherry picked from commit 8fb583c) Orabug: 28894138 Signed-off-by: Thomas Tai <[email protected]> Reviewed-by: Tom Saeger <[email protected]> Signed-off-by: Somasundaram Krishnasamy <[email protected]>
1 parent cc2fbe2 commit 3e344ab

File tree

1 file changed

+36
-15
lines changed

1 file changed

+36
-15
lines changed

lib/sort.c

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,8 @@ static bool is_aligned(const void *base, size_t size, unsigned char align)
5454
* subtract (since the intervening mov instructions don't alter the flags).
5555
* Gcc 8.1.0 doesn't have that problem.
5656
*/
57-
static void swap_words_32(void *a, void *b, int size)
57+
static void swap_words_32(void *a, void *b, size_t n)
5858
{
59-
size_t n = (unsigned int)size;
60-
6159
do {
6260
u32 t = *(u32 *)(a + (n -= 4));
6361
*(u32 *)(a + n) = *(u32 *)(b + n);
@@ -80,10 +78,8 @@ static void swap_words_32(void *a, void *b, int size)
8078
* but it's possible to have 64-bit loads without 64-bit pointers (e.g.
8179
* x32 ABI). Are there any cases the kernel needs to worry about?
8280
*/
83-
static void swap_words_64(void *a, void *b, int size)
81+
static void swap_words_64(void *a, void *b, size_t n)
8482
{
85-
size_t n = (unsigned int)size;
86-
8783
do {
8884
#ifdef CONFIG_64BIT
8985
u64 t = *(u64 *)(a + (n -= 8));
@@ -109,17 +105,42 @@ static void swap_words_64(void *a, void *b, int size)
109105
*
110106
* This is the fallback if alignment doesn't allow using larger chunks.
111107
*/
112-
static void swap_bytes(void *a, void *b, int size)
108+
static void swap_bytes(void *a, void *b, size_t n)
113109
{
114-
size_t n = (unsigned int)size;
115-
116110
do {
117111
char t = ((char *)a)[--n];
118112
((char *)a)[n] = ((char *)b)[n];
119113
((char *)b)[n] = t;
120114
} while (n);
121115
}
122116

117+
typedef void (*swap_func_t)(void *a, void *b, int size);
118+
119+
/*
120+
* The values are arbitrary as long as they can't be confused with
121+
* a pointer, but small integers make for the smallest compare
122+
* instructions.
123+
*/
124+
#define SWAP_WORDS_64 (swap_func_t)0
125+
#define SWAP_WORDS_32 (swap_func_t)1
126+
#define SWAP_BYTES (swap_func_t)2
127+
128+
/*
129+
* The function pointer is last to make tail calls most efficient if the
130+
* compiler decides not to inline this function.
131+
*/
132+
static void do_swap(void *a, void *b, size_t size, swap_func_t swap_func)
133+
{
134+
if (swap_func == SWAP_WORDS_64)
135+
swap_words_64(a, b, size);
136+
else if (swap_func == SWAP_WORDS_32)
137+
swap_words_32(a, b, size);
138+
else if (swap_func == SWAP_BYTES)
139+
swap_bytes(a, b, size);
140+
else
141+
swap_func(a, b, (int)size);
142+
}
143+
123144
/**
124145
* parent - given the offset of the child, find the offset of the parent.
125146
* @i: the offset of the heap element whose parent is sought. Non-zero.
@@ -157,7 +178,7 @@ static size_t parent(size_t i, unsigned int lsbit, size_t size)
157178
* This function does a heapsort on the given array. You may provide
158179
* a swap_func function if you need to do something more than a memory
159180
* copy (e.g. fix up pointers or auxiliary data), but the built-in swap
160-
* isn't usually a bottleneck.
181+
* avoids a slow retpoline and so is significantly faster.
161182
*
162183
* Sorting time is O(n log n) both on average and worst-case. While
163184
* quicksort is slightly faster on average, it suffers from exploitable
@@ -177,11 +198,11 @@ void sort(void *base, size_t num, size_t size,
177198

178199
if (!swap_func) {
179200
if (is_aligned(base, size, 8))
180-
swap_func = swap_words_64;
201+
swap_func = SWAP_WORDS_64;
181202
else if (is_aligned(base, size, 4))
182-
swap_func = swap_words_32;
203+
swap_func = SWAP_WORDS_32;
183204
else
184-
swap_func = swap_bytes;
205+
swap_func = SWAP_BYTES;
185206
}
186207

187208
/*
@@ -197,7 +218,7 @@ void sort(void *base, size_t num, size_t size,
197218
if (a) /* Building heap: sift down --a */
198219
a -= size;
199220
else if (n -= size) /* Sorting: Extract root to --n */
200-
swap_func(base, base + n, size);
221+
do_swap(base, base + n, size, swap_func);
201222
else /* Sort complete */
202223
break;
203224

@@ -224,7 +245,7 @@ void sort(void *base, size_t num, size_t size,
224245
c = b; /* Where "a" belongs */
225246
while (b != a) { /* Shift it into place */
226247
b = parent(b, lsbit, size);
227-
swap_func(base + b, base + c, size);
248+
do_swap(base + b, base + c, size, swap_func);
228249
}
229250
}
230251
}

0 commit comments

Comments
 (0)