Skip to content

Commit 3af722c

Browse files
chleroydavem330
authored andcommitted
powerpc/net: Implement powerpc specific csum_shift() to remove branch
Today's implementation of csum_shift() leads to branching based on parity of 'offset' 000002f8 <csum_block_add>: 2f8: 70 a5 00 01 andi. r5,r5,1 2fc: 41 a2 00 08 beq 304 <csum_block_add+0xc> 300: 54 84 c0 3e rotlwi r4,r4,24 304: 7c 63 20 14 addc r3,r3,r4 308: 7c 63 01 94 addze r3,r3 30c: 4e 80 00 20 blr Use first bit of 'offset' directly as input of the rotation instead of branching. 000002f8 <csum_block_add>: 2f8: 54 a5 1f 38 rlwinm r5,r5,3,28,28 2fc: 20 a5 00 20 subfic r5,r5,32 300: 5c 84 28 3e rotlw r4,r4,r5 304: 7c 63 20 14 addc r3,r3,r4 308: 7c 63 01 94 addze r3,r3 30c: 4e 80 00 20 blr And change to left shift instead of right shift to skip one more instruction. This has no impact on the final sum. 000002f8 <csum_block_add>: 2f8: 54 a5 1f 38 rlwinm r5,r5,3,28,28 2fc: 5c 84 28 3e rotlw r4,r4,r5 300: 7c 63 20 14 addc r3,r3,r4 304: 7c 63 01 94 addze r3,r3 308: 4e 80 00 20 blr Seems like only powerpc benefits from a branchless implementation. Other main architectures like ARM or X86 get better code with the generic implementation and its branch. Signed-off-by: Christophe Leroy <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 8ef1dc4 commit 3af722c

File tree

2 files changed

+9
-0
lines changed

2 files changed

+9
-0
lines changed

arch/powerpc/include/asm/checksum.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,13 @@ static __always_inline __wsum csum_add(__wsum csum, __wsum addend)
112112
#endif
113113
}
114114

115+
#define HAVE_ARCH_CSUM_SHIFT
116+
static __always_inline __wsum csum_shift(__wsum sum, int offset)
117+
{
118+
/* rotate sum to align it with a 16b boundary */
119+
return (__force __wsum)rol32((__force u32)sum, (offset & 1) << 3);
120+
}
121+
115122
/*
116123
* This is a version of ip_compute_csum() optimized for IP headers,
117124
* which always checksum on 4 octet boundaries. ihl is the number

include/net/checksum.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,13 +80,15 @@ static __always_inline __sum16 csum16_sub(__sum16 csum, __be16 addend)
8080
return csum16_add(csum, ~addend);
8181
}
8282

83+
#ifndef HAVE_ARCH_CSUM_SHIFT
8384
static __always_inline __wsum csum_shift(__wsum sum, int offset)
8485
{
8586
/* rotate sum to align it with a 16b boundary */
8687
if (offset & 1)
8788
return (__force __wsum)ror32((__force u32)sum, 8);
8889
return sum;
8990
}
91+
#endif
9092

9193
static __always_inline __wsum
9294
csum_block_add(__wsum csum, __wsum csum2, int offset)

0 commit comments

Comments
 (0)