Skip to content

Commit cc7cf99

Browse files
ebiggersherbertx
authored andcommitted
crypto: arm64/chacha20 - add XChaCha20 support
Add an XChaCha20 implementation that is hooked up to the ARM64 NEON implementation of ChaCha20. This can be used by Adiantum. A NEON implementation of single-block HChaCha20 is also added so that XChaCha20 can use it rather than the generic implementation. This required refactoring the ChaCha20 permutation into its own function. Signed-off-by: Eric Biggers <[email protected]> Reviewed-by: Ard Biesheuvel <[email protected]> Signed-off-by: Herbert Xu <[email protected]>
1 parent a00fa0c commit cc7cf99

File tree

3 files changed

+125
-43
lines changed

3 files changed

+125
-43
lines changed

arch/arm64/crypto/Kconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ config CRYPTO_AES_ARM64_NEON_BLK
101101
select CRYPTO_SIMD
102102

103103
config CRYPTO_CHACHA20_NEON
104-
tristate "NEON accelerated ChaCha20 symmetric cipher"
104+
tristate "ChaCha20 and XChaCha20 stream ciphers using NEON instructions"
105105
depends on KERNEL_MODE_NEON
106106
select CRYPTO_BLKCIPHER
107107
select CRYPTO_CHACHA20

arch/arm64/crypto/chacha20-neon-core.S

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,25 +23,20 @@
2323
.text
2424
.align 6
2525

26-
ENTRY(chacha20_block_xor_neon)
27-
// x0: Input state matrix, s
28-
// x1: 1 data block output, o
29-
// x2: 1 data block input, i
30-
31-
//
32-
// This function encrypts one ChaCha20 block by loading the state matrix
33-
// in four NEON registers. It performs matrix operation on four words in
34-
// parallel, but requires shuffling to rearrange the words after each
35-
// round.
36-
//
37-
38-
// x0..3 = s0..3
39-
adr x3, ROT8
40-
ld1 {v0.4s-v3.4s}, [x0]
41-
ld1 {v8.4s-v11.4s}, [x0]
42-
ld1 {v12.4s}, [x3]
26+
/*
27+
* chacha20_permute - permute one block
28+
*
29+
* Permute one 64-byte block where the state matrix is stored in the four NEON
30+
* registers v0-v3. It performs matrix operations on four words in parallel,
31+
* but requires shuffling to rearrange the words after each round.
32+
*
33+
* Clobbers: x3, x10, v4, v12
34+
*/
35+
chacha20_permute:
4336

4437
mov x3, #10
38+
adr x10, ROT8
39+
ld1 {v12.4s}, [x10]
4540

4641
.Ldoubleround:
4742
// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
@@ -105,6 +100,23 @@ ENTRY(chacha20_block_xor_neon)
105100
subs x3, x3, #1
106101
b.ne .Ldoubleround
107102

103+
ret
104+
ENDPROC(chacha20_permute)
105+
106+
ENTRY(chacha20_block_xor_neon)
107+
// x0: Input state matrix, s
108+
// x1: 1 data block output, o
109+
// x2: 1 data block input, i
110+
111+
stp x29, x30, [sp, #-16]!
112+
mov x29, sp
113+
114+
// x0..3 = s0..3
115+
ld1 {v0.4s-v3.4s}, [x0]
116+
ld1 {v8.4s-v11.4s}, [x0]
117+
118+
bl chacha20_permute
119+
108120
ld1 {v4.16b-v7.16b}, [x2]
109121

110122
// o0 = i0 ^ (x0 + s0)
@@ -125,9 +137,28 @@ ENTRY(chacha20_block_xor_neon)
125137

126138
st1 {v0.16b-v3.16b}, [x1]
127139

140+
ldp x29, x30, [sp], #16
128141
ret
129142
ENDPROC(chacha20_block_xor_neon)
130143

144+
ENTRY(hchacha20_block_neon)
145+
// x0: Input state matrix, s
146+
// x1: output (8 32-bit words)
147+
148+
stp x29, x30, [sp, #-16]!
149+
mov x29, sp
150+
151+
ld1 {v0.4s-v3.4s}, [x0]
152+
153+
bl chacha20_permute
154+
155+
st1 {v0.16b}, [x1], #16
156+
st1 {v3.16b}, [x1]
157+
158+
ldp x29, x30, [sp], #16
159+
ret
160+
ENDPROC(hchacha20_block_neon)
161+
131162
.align 6
132163
ENTRY(chacha20_4block_xor_neon)
133164
// x0: Input state matrix, s

arch/arm64/crypto/chacha20-neon-glue.c

Lines changed: 76 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030

3131
asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
3232
asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
33+
asmlinkage void hchacha20_block_neon(const u32 *state, u32 *out);
3334

3435
static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
3536
unsigned int bytes)
@@ -65,20 +66,16 @@ static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
6566
kernel_neon_end();
6667
}
6768

68-
static int chacha20_neon(struct skcipher_request *req)
69+
static int chacha20_neon_stream_xor(struct skcipher_request *req,
70+
struct chacha_ctx *ctx, u8 *iv)
6971
{
70-
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
71-
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
7272
struct skcipher_walk walk;
7373
u32 state[16];
7474
int err;
7575

76-
if (!may_use_simd() || req->cryptlen <= CHACHA_BLOCK_SIZE)
77-
return crypto_chacha_crypt(req);
78-
7976
err = skcipher_walk_virt(&walk, req, false);
8077

81-
crypto_chacha_init(state, ctx, walk.iv);
78+
crypto_chacha_init(state, ctx, iv);
8279

8380
while (walk.nbytes > 0) {
8481
unsigned int nbytes = walk.nbytes;
@@ -94,35 +91,86 @@ static int chacha20_neon(struct skcipher_request *req)
9491
return err;
9592
}
9693

97-
static struct skcipher_alg alg = {
98-
.base.cra_name = "chacha20",
99-
.base.cra_driver_name = "chacha20-neon",
100-
.base.cra_priority = 300,
101-
.base.cra_blocksize = 1,
102-
.base.cra_ctxsize = sizeof(struct chacha_ctx),
103-
.base.cra_module = THIS_MODULE,
104-
105-
.min_keysize = CHACHA_KEY_SIZE,
106-
.max_keysize = CHACHA_KEY_SIZE,
107-
.ivsize = CHACHA_IV_SIZE,
108-
.chunksize = CHACHA_BLOCK_SIZE,
109-
.walksize = 4 * CHACHA_BLOCK_SIZE,
110-
.setkey = crypto_chacha20_setkey,
111-
.encrypt = chacha20_neon,
112-
.decrypt = chacha20_neon,
94+
static int chacha20_neon(struct skcipher_request *req)
95+
{
96+
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
97+
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
98+
99+
if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
100+
return crypto_chacha_crypt(req);
101+
102+
return chacha20_neon_stream_xor(req, ctx, req->iv);
103+
}
104+
105+
static int xchacha20_neon(struct skcipher_request *req)
106+
{
107+
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
108+
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
109+
struct chacha_ctx subctx;
110+
u32 state[16];
111+
u8 real_iv[16];
112+
113+
if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
114+
return crypto_xchacha_crypt(req);
115+
116+
crypto_chacha_init(state, ctx, req->iv);
117+
118+
kernel_neon_begin();
119+
hchacha20_block_neon(state, subctx.key);
120+
kernel_neon_end();
121+
122+
memcpy(&real_iv[0], req->iv + 24, 8);
123+
memcpy(&real_iv[8], req->iv + 16, 8);
124+
return chacha20_neon_stream_xor(req, &subctx, real_iv);
125+
}
126+
127+
static struct skcipher_alg algs[] = {
128+
{
129+
.base.cra_name = "chacha20",
130+
.base.cra_driver_name = "chacha20-neon",
131+
.base.cra_priority = 300,
132+
.base.cra_blocksize = 1,
133+
.base.cra_ctxsize = sizeof(struct chacha_ctx),
134+
.base.cra_module = THIS_MODULE,
135+
136+
.min_keysize = CHACHA_KEY_SIZE,
137+
.max_keysize = CHACHA_KEY_SIZE,
138+
.ivsize = CHACHA_IV_SIZE,
139+
.chunksize = CHACHA_BLOCK_SIZE,
140+
.walksize = 4 * CHACHA_BLOCK_SIZE,
141+
.setkey = crypto_chacha20_setkey,
142+
.encrypt = chacha20_neon,
143+
.decrypt = chacha20_neon,
144+
}, {
145+
.base.cra_name = "xchacha20",
146+
.base.cra_driver_name = "xchacha20-neon",
147+
.base.cra_priority = 300,
148+
.base.cra_blocksize = 1,
149+
.base.cra_ctxsize = sizeof(struct chacha_ctx),
150+
.base.cra_module = THIS_MODULE,
151+
152+
.min_keysize = CHACHA_KEY_SIZE,
153+
.max_keysize = CHACHA_KEY_SIZE,
154+
.ivsize = XCHACHA_IV_SIZE,
155+
.chunksize = CHACHA_BLOCK_SIZE,
156+
.walksize = 4 * CHACHA_BLOCK_SIZE,
157+
.setkey = crypto_chacha20_setkey,
158+
.encrypt = xchacha20_neon,
159+
.decrypt = xchacha20_neon,
160+
}
113161
};
114162

115163
static int __init chacha20_simd_mod_init(void)
116164
{
117165
if (!(elf_hwcap & HWCAP_ASIMD))
118166
return -ENODEV;
119167

120-
return crypto_register_skcipher(&alg);
168+
return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
121169
}
122170

123171
static void __exit chacha20_simd_mod_fini(void)
124172
{
125-
crypto_unregister_skcipher(&alg);
173+
crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
126174
}
127175

128176
module_init(chacha20_simd_mod_init);
@@ -131,3 +179,6 @@ module_exit(chacha20_simd_mod_fini);
131179
MODULE_AUTHOR("Ard Biesheuvel <[email protected]>");
132180
MODULE_LICENSE("GPL v2");
133181
MODULE_ALIAS_CRYPTO("chacha20");
182+
MODULE_ALIAS_CRYPTO("chacha20-neon");
183+
MODULE_ALIAS_CRYPTO("xchacha20");
184+
MODULE_ALIAS_CRYPTO("xchacha20-neon");

0 commit comments

Comments
 (0)