Skip to content

Commit f68e556

Browse files
committed
Make the "word-at-a-time" helper functions more commonly usable
I have a new optimized x86 "strncpy_from_user()" that will use these same helper functions for all the same reasons the name lookup code uses them. This is preparation for that. This moves them into an architecture-specific header file. It's architecture-specific for two reasons: - some of the functions are likely to want architecture-specific implementations. Even if the current code happens to be "generic" in the sense that it should work on any little-endian machine, it's likely that the "multiply by a big constant and shift" implementation is less than optimal for an architecture that has a guaranteed fast bit count instruction, for example. - I expect that if architectures like sparc want to start playing around with this, we'll need to abstract out a few more details (in particular the actual unaligned accesses). So we're likely to have more architecture-specific stuff if non-x86 architectures start using this. (and if it turns out that non-x86 architectures don't start using this, then having it in an architecture-specific header is still the right thing to do, of course) Signed-off-by: Linus Torvalds <[email protected]>
1 parent 23f347e commit f68e556

File tree

2 files changed

+49
-32
lines changed

2 files changed

+49
-32
lines changed

arch/x86/include/asm/word-at-a-time.h

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#ifndef _ASM_WORD_AT_A_TIME_H
2+
#define _ASM_WORD_AT_A_TIME_H
3+
4+
/*
5+
* This is largely generic for little-endian machines, but the
6+
* optimal byte mask counting is probably going to be something
7+
* that is architecture-specific. If you have a reliably fast
8+
* bit count instruction, that might be better than the multiply
9+
* and shift, for example.
10+
*/
11+
12+
#ifdef CONFIG_64BIT
13+
14+
/*
15+
* Jan Achrenius on G+: microoptimized version of
16+
* the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
17+
* that works for the bytemasks without having to
18+
* mask them first.
19+
*/
20+
static inline long count_masked_bytes(unsigned long mask)
21+
{
22+
return mask*0x0001020304050608ul >> 56;
23+
}
24+
25+
#else /* 32-bit case */
26+
27+
/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
28+
static inline long count_masked_bytes(long mask)
29+
{
30+
/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
31+
long a = (0x0ff0001+mask) >> 23;
32+
/* Fix the 1 for 00 case */
33+
return a & mask;
34+
}
35+
36+
#endif
37+
38+
#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
39+
40+
/* Return the high bit set in the first byte that is a zero */
41+
static inline unsigned long has_zero(unsigned long a)
42+
{
43+
return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80);
44+
}
45+
46+
#endif /* _ASM_WORD_AT_A_TIME_H */

fs/namei.c

Lines changed: 3 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1407,18 +1407,9 @@ static inline int can_lookup(struct inode *inode)
14071407
*/
14081408
#ifdef CONFIG_DCACHE_WORD_ACCESS
14091409

1410-
#ifdef CONFIG_64BIT
1410+
#include <asm/word-at-a-time.h>
14111411

1412-
/*
1413-
* Jan Achrenius on G+: microoptimized version of
1414-
* the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
1415-
* that works for the bytemasks without having to
1416-
* mask them first.
1417-
*/
1418-
static inline long count_masked_bytes(unsigned long mask)
1419-
{
1420-
return mask*0x0001020304050608ul >> 56;
1421-
}
1412+
#ifdef CONFIG_64BIT
14221413

14231414
static inline unsigned int fold_hash(unsigned long hash)
14241415
{
@@ -1428,15 +1419,6 @@ static inline unsigned int fold_hash(unsigned long hash)
14281419

14291420
#else /* 32-bit case */
14301421

1431-
/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
1432-
static inline long count_masked_bytes(long mask)
1433-
{
1434-
/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
1435-
long a = (0x0ff0001+mask) >> 23;
1436-
/* Fix the 1 for 00 case */
1437-
return a & mask;
1438-
}
1439-
14401422
#define fold_hash(x) (x)
14411423

14421424
#endif
@@ -1464,17 +1446,6 @@ unsigned int full_name_hash(const unsigned char *name, unsigned int len)
14641446
}
14651447
EXPORT_SYMBOL(full_name_hash);
14661448

1467-
#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
1468-
#define ONEBYTES REPEAT_BYTE(0x01)
1469-
#define SLASHBYTES REPEAT_BYTE('/')
1470-
#define HIGHBITS REPEAT_BYTE(0x80)
1471-
1472-
/* Return the high bit set in the first byte that is a zero */
1473-
static inline unsigned long has_zero(unsigned long a)
1474-
{
1475-
return ((a - ONEBYTES) & ~a) & HIGHBITS;
1476-
}
1477-
14781449
/*
14791450
* Calculate the length and hash of the path component, and
14801451
* return the length of the component;
@@ -1490,7 +1461,7 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp)
14901461
len += sizeof(unsigned long);
14911462
a = *(unsigned long *)(name+len);
14921463
/* Do we have any NUL or '/' bytes in this word? */
1493-
mask = has_zero(a) | has_zero(a ^ SLASHBYTES);
1464+
mask = has_zero(a) | has_zero(a ^ REPEAT_BYTE('/'));
14941465
} while (!mask);
14951466

14961467
/* The mask *below* the first high bit set */

0 commit comments

Comments
 (0)