Skip to content

Commit f8a6e48

Browse files
committed
Merge local branch 'x86-codegen'
Merge trivial x86 code generation annoyances - Introduce helper macros for clang asm input problems - use said macros to improve trivially stupid code generation issues in bitops and array_index_mask_nospec - also improve codegen with 32-bit array index comparisons None of these really matter, but I look at code generation and profiles fairly regularly, and these misfeatures caused the generated code to look really odd and distract from the real issues. * branch 'x86-codegen' of local tree: x86: improve bitop code generation with clang x86: improve array_index_mask_nospec() code generation clang: work around asm input constraint problems
2 parents 5f16eb0 + b9b60b3 commit f8a6e48

File tree

4 files changed

+34
-19
lines changed

4 files changed

+34
-19
lines changed

arch/x86/include/asm/barrier.h

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -33,20 +33,16 @@
3333
* Returns:
3434
* 0 - (index < size)
3535
*/
36-
static __always_inline unsigned long array_index_mask_nospec(unsigned long index,
37-
unsigned long size)
38-
{
39-
unsigned long mask;
40-
41-
asm volatile ("cmp %1,%2; sbb %0,%0;"
42-
:"=r" (mask)
43-
:"g"(size),"r" (index)
44-
:"cc");
45-
return mask;
46-
}
47-
48-
/* Override the default implementation from linux/nospec.h. */
49-
#define array_index_mask_nospec array_index_mask_nospec
36+
#define array_index_mask_nospec(idx,sz) ({ \
37+
typeof((idx)+(sz)) __idx = (idx); \
38+
typeof(__idx) __sz = (sz); \
39+
unsigned long __mask; \
40+
asm volatile ("cmp %1,%2; sbb %0,%0" \
41+
:"=r" (__mask) \
42+
:ASM_INPUT_G (__sz), \
43+
"r" (__idx) \
44+
:"cc"); \
45+
__mask; })
5046

5147
/* Prevent speculative execution past this barrier. */
5248
#define barrier_nospec() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC)

arch/x86/include/asm/bitops.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ static __always_inline unsigned long variable__ffs(unsigned long word)
250250
{
251251
asm("rep; bsf %1,%0"
252252
: "=r" (word)
253-
: "rm" (word));
253+
: ASM_INPUT_RM (word));
254254
return word;
255255
}
256256

@@ -297,7 +297,7 @@ static __always_inline unsigned long __fls(unsigned long word)
297297

298298
asm("bsr %1,%0"
299299
: "=r" (word)
300-
: "rm" (word));
300+
: ASM_INPUT_RM (word));
301301
return word;
302302
}
303303

@@ -320,7 +320,7 @@ static __always_inline int variable_ffs(int x)
320320
*/
321321
asm("bsfl %1,%0"
322322
: "=r" (r)
323-
: "rm" (x), "0" (-1));
323+
: ASM_INPUT_RM (x), "0" (-1));
324324
#elif defined(CONFIG_X86_CMOV)
325325
asm("bsfl %1,%0\n\t"
326326
"cmovzl %2,%0"
@@ -377,7 +377,7 @@ static __always_inline int fls(unsigned int x)
377377
*/
378378
asm("bsrl %1,%0"
379379
: "=r" (r)
380-
: "rm" (x), "0" (-1));
380+
: ASM_INPUT_RM (x), "0" (-1));
381381
#elif defined(CONFIG_X86_CMOV)
382382
asm("bsrl %1,%0\n\t"
383383
"cmovzl %2,%0"
@@ -416,7 +416,7 @@ static __always_inline int fls64(__u64 x)
416416
*/
417417
asm("bsrq %1,%q0"
418418
: "+r" (bitpos)
419-
: "rm" (x));
419+
: ASM_INPUT_RM (x));
420420
return bitpos + 1;
421421
}
422422
#else

include/linux/compiler-clang.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,3 +118,13 @@
118118

119119
#define __diag_ignore_all(option, comment) \
120120
__diag_clang(13, ignore, option)
121+
122+
/*
123+
* clang has horrible behavior with "g" or "rm" constraints for asm
124+
* inputs, turning them into something worse than "m". Avoid using
125+
* constraints with multiple possible uses (but "ir" seems to be ok):
126+
*
127+
* https://github.com/llvm/llvm-project/issues/20571
128+
*/
129+
#define ASM_INPUT_G "ir"
130+
#define ASM_INPUT_RM "r"

include/linux/compiler_types.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,15 @@ struct ftrace_likely_data {
409409
#define asm_goto_output(x...) asm volatile goto(x)
410410
#endif
411411

412+
/*
413+
* Clang has trouble with constraints with multiple
414+
* alternative behaviors (mainly "g" and "rm").
415+
*/
416+
#ifndef ASM_INPUT_G
417+
#define ASM_INPUT_G "g"
418+
#define ASM_INPUT_RM "rm"
419+
#endif
420+
412421
#ifdef CONFIG_CC_HAS_ASM_INLINE
413422
#define asm_inline asm __inline
414423
#else

0 commit comments

Comments
 (0)