Skip to content

Commit 751ba79

Browse files
m-browmpe
authored andcommitted
lib/raid6/altivec: Add vpermxor implementation for raid6 Q syndrome
This patch uses the vpermxor instruction to optimise the raid6 Q syndrome. This instruction was made available with POWER8, ISA version 2.07. It allows for both vperm and vxor instructions to be done in a single instruction. This has been tested for correctness on a ppc64le vm with a basic RAID6 setup containing 5 drives. The performance benchmarks are from the raid6test in the /lib/raid6/test directory. These results are from an IBM Firestone machine with ppc64le architecture. The benchmark results show a 35% speed increase over the best existing algorithm for powerpc (altivec). The raid6test has also been run on a big-endian ppc64 vm to ensure it also works for big-endian architectures. Performance benchmarks: raid6: altivecx4 gen() 18773 MB/s raid6: altivecx8 gen() 19438 MB/s raid6: vpermxor4 gen() 25112 MB/s raid6: vpermxor8 gen() 26279 MB/s Signed-off-by: Matt Brown <[email protected]> Reviewed-by: Daniel Axtens <[email protected]> [mpe: Add VPERMXOR macro so we can build with old binutils] Signed-off-by: Michael Ellerman <[email protected]>
1 parent 7004263 commit 751ba79

File tree

7 files changed

+161
-3
lines changed

7 files changed

+161
-3
lines changed

arch/powerpc/include/asm/ppc-opcode.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@
271271
#define PPC_INST_TLBSRX_DOT 0x7c0006a5
272272
#define PPC_INST_VPMSUMW 0x10000488
273273
#define PPC_INST_VPMSUMD 0x100004c8
274+
#define PPC_INST_VPERMXOR 0x1000002d
274275
#define PPC_INST_XXLOR 0xf0000490
275276
#define PPC_INST_XXSWAPD 0xf0000250
276277
#define PPC_INST_XVCPSGNDP 0xf0000780
@@ -517,6 +518,11 @@
517518
#define XVCPSGNDP(t, a, b) stringify_in_c(.long (PPC_INST_XVCPSGNDP | \
518519
VSX_XX3((t), (a), (b))))
519520

521+
#define VPERMXOR(vrt, vra, vrb, vrc) \
522+
stringify_in_c(.long (PPC_INST_VPERMXOR | \
523+
___PPC_RT(vrt) | ___PPC_RA(vra) | \
524+
___PPC_RB(vrb) | (((vrc) & 0x1f) << 6)))
525+
520526
#define PPC_NAP stringify_in_c(.long PPC_INST_NAP)
521527
#define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP)
522528
#define PPC_WINKLE stringify_in_c(.long PPC_INST_WINKLE)

include/linux/raid/pq.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,10 @@ extern const struct raid6_calls raid6_avx512x2;
107107
extern const struct raid6_calls raid6_avx512x4;
108108
extern const struct raid6_calls raid6_tilegx8;
109109
extern const struct raid6_calls raid6_s390vx8;
110+
extern const struct raid6_calls raid6_vpermxor1;
111+
extern const struct raid6_calls raid6_vpermxor2;
112+
extern const struct raid6_calls raid6_vpermxor4;
113+
extern const struct raid6_calls raid6_vpermxor8;
110114

111115
struct raid6_recov_calls {
112116
void (*data2)(int, size_t, int, int, void **);

lib/raid6/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ int*.c
44
tables.c
55
neon?.c
66
s390vx?.c
7+
vpermxor*.c

lib/raid6/Makefile

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
55
int8.o int16.o int32.o
66

77
raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o
8-
raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
8+
raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
9+
vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
910
raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
1011
raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
1112
raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
@@ -91,6 +92,30 @@ $(obj)/altivec8.c: UNROLL := 8
9192
$(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
9293
$(call if_changed,unroll)
9394

95+
CFLAGS_vpermxor1.o += $(altivec_flags)
96+
targets += vpermxor1.c
97+
$(obj)/vpermxor1.c: UNROLL := 1
98+
$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
99+
$(call if_changed,unroll)
100+
101+
CFLAGS_vpermxor2.o += $(altivec_flags)
102+
targets += vpermxor2.c
103+
$(obj)/vpermxor2.c: UNROLL := 2
104+
$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
105+
$(call if_changed,unroll)
106+
107+
CFLAGS_vpermxor4.o += $(altivec_flags)
108+
targets += vpermxor4.c
109+
$(obj)/vpermxor4.c: UNROLL := 4
110+
$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
111+
$(call if_changed,unroll)
112+
113+
CFLAGS_vpermxor8.o += $(altivec_flags)
114+
targets += vpermxor8.c
115+
$(obj)/vpermxor8.c: UNROLL := 8
116+
$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
117+
$(call if_changed,unroll)
118+
94119
CFLAGS_neon1.o += $(NEON_FLAGS)
95120
targets += neon1.c
96121
$(obj)/neon1.c: UNROLL := 1

lib/raid6/algos.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = {
7474
&raid6_altivec2,
7575
&raid6_altivec4,
7676
&raid6_altivec8,
77+
&raid6_vpermxor1,
78+
&raid6_vpermxor2,
79+
&raid6_vpermxor4,
80+
&raid6_vpermxor8,
7781
#endif
7882
#if defined(CONFIG_TILEGX)
7983
&raid6_tilegx8,

lib/raid6/test/Makefile

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ else
4848
gcc -c -x c - >&/dev/null && \
4949
rm ./-.o && echo yes)
5050
ifeq ($(HAS_ALTIVEC),yes)
51-
OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
51+
OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
52+
vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
5253
endif
5354
endif
5455
ifeq ($(ARCH),tilegx)
@@ -98,6 +99,18 @@ altivec4.c: altivec.uc ../unroll.awk
9899
altivec8.c: altivec.uc ../unroll.awk
99100
$(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
100101

102+
vpermxor1.c: vpermxor.uc ../unroll.awk
103+
$(AWK) ../unroll.awk -vN=1 < vpermxor.uc > $@
104+
105+
vpermxor2.c: vpermxor.uc ../unroll.awk
106+
$(AWK) ../unroll.awk -vN=2 < vpermxor.uc > $@
107+
108+
vpermxor4.c: vpermxor.uc ../unroll.awk
109+
$(AWK) ../unroll.awk -vN=4 < vpermxor.uc > $@
110+
111+
vpermxor8.c: vpermxor.uc ../unroll.awk
112+
$(AWK) ../unroll.awk -vN=8 < vpermxor.uc > $@
113+
101114
int1.c: int.uc ../unroll.awk
102115
$(AWK) ../unroll.awk -vN=1 < int.uc > $@
103116

@@ -123,7 +136,7 @@ tables.c: mktables
123136
./mktables > tables.c
124137

125138
clean:
126-
rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test
139+
rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c vpermxor*.c neon*.c tables.c raid6test
127140
rm -f tilegx*.c
128141

129142
spotless: clean

lib/raid6/vpermxor.uc

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
/*
2+
* Copyright 2017, Matt Brown, IBM Corp.
3+
*
4+
* This program is free software; you can redistribute it and/or
5+
* modify it under the terms of the GNU General Public License
6+
* as published by the Free Software Foundation; either version
7+
* 2 of the License, or (at your option) any later version.
8+
*
9+
* vpermxor$#.c
10+
*
11+
* Based on H. Peter Anvin's paper - The mathematics of RAID-6
12+
*
13+
* $#-way unrolled portable integer math RAID-6 instruction set
14+
* This file is postprocessed using unroll.awk
15+
*
16+
* vpermxor$#.c makes use of the vpermxor instruction to optimise the RAID6 Q
17+
* syndrome calculations.
18+
* This can be run on systems which have both Altivec and vpermxor instruction.
19+
*
20+
* This instruction was introduced in POWER8 - ISA v2.07.
21+
*/
22+
23+
#include <linux/raid/pq.h>
24+
#ifdef CONFIG_ALTIVEC
25+
26+
#include <altivec.h>
27+
#ifdef __KERNEL__
28+
#include <asm/cputable.h>
29+
#include <asm/ppc-opcode.h>
30+
#include <asm/switch_to.h>
31+
#endif
32+
33+
typedef vector unsigned char unative_t;
34+
#define NSIZE sizeof(unative_t)
35+
36+
static const vector unsigned char gf_low = {0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14,
37+
0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08,
38+
0x06, 0x04, 0x02,0x00};
39+
static const vector unsigned char gf_high = {0xfd, 0xdd, 0xbd, 0x9d, 0x7d, 0x5d,
40+
0x3d, 0x1d, 0xe0, 0xc0, 0xa0, 0x80,
41+
0x60, 0x40, 0x20, 0x00};
42+
43+
static void noinline raid6_vpermxor$#_gen_syndrome_real(int disks, size_t bytes,
44+
void **ptrs)
45+
{
46+
u8 **dptr = (u8 **)ptrs;
47+
u8 *p, *q;
48+
int d, z, z0;
49+
unative_t wp$$, wq$$, wd$$;
50+
51+
z0 = disks - 3; /* Highest data disk */
52+
p = dptr[z0+1]; /* XOR parity */
53+
q = dptr[z0+2]; /* RS syndrome */
54+
55+
for (d = 0; d < bytes; d += NSIZE*$#) {
56+
wp$$ = wq$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
57+
58+
for (z = z0-1; z>=0; z--) {
59+
wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
60+
/* P syndrome */
61+
wp$$ = vec_xor(wp$$, wd$$);
62+
63+
/* Q syndrome */
64+
asm(VPERMXOR(%0,%1,%2,%3):"=v"(wq$$):"v"(gf_high), "v"(gf_low), "v"(wq$$));
65+
wq$$ = vec_xor(wq$$, wd$$);
66+
}
67+
*(unative_t *)&p[d+NSIZE*$$] = wp$$;
68+
*(unative_t *)&q[d+NSIZE*$$] = wq$$;
69+
}
70+
}
71+
72+
static void raid6_vpermxor$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
73+
{
74+
preempt_disable();
75+
enable_kernel_altivec();
76+
77+
raid6_vpermxor$#_gen_syndrome_real(disks, bytes, ptrs);
78+
79+
disable_kernel_altivec();
80+
preempt_enable();
81+
}
82+
83+
int raid6_have_altivec_vpermxor(void);
84+
#if $# == 1
85+
int raid6_have_altivec_vpermxor(void)
86+
{
87+
/* Check if arch has both altivec and the vpermxor instructions */
88+
# ifdef __KERNEL__
89+
return (cpu_has_feature(CPU_FTR_ALTIVEC_COMP) &&
90+
cpu_has_feature(CPU_FTR_ARCH_207S));
91+
# else
92+
return 1;
93+
#endif
94+
95+
}
96+
#endif
97+
98+
const struct raid6_calls raid6_vpermxor$# = {
99+
raid6_vpermxor$#_gen_syndrome,
100+
NULL,
101+
raid6_have_altivec_vpermxor,
102+
"vpermxor$#",
103+
0
104+
};
105+
#endif

0 commit comments

Comments
 (0)