Skip to content

Commit ad970e8

Browse files
methaneErlend Egeberg Aaslandtiran
authored
bpo-29410: Change the default hash algorithm to SipHash13. (GH-28752)
Co-authored-by: Erlend Egeberg Aasland <[email protected]> Co-authored-by: Christian Heimes <[email protected]>
1 parent a1c3c9e commit ad970e8

File tree

11 files changed

+123
-26
lines changed

11 files changed

+123
-26
lines changed

Doc/using/configure.rst

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -416,15 +416,19 @@ Libraries options
416416
Security Options
417417
----------------
418418

419-
.. cmdoption:: --with-hash-algorithm=[fnv|siphash24]
419+
.. cmdoption:: --with-hash-algorithm=[fnv|siphash13|siphash24]
420420

421421
Select hash algorithm for use in ``Python/pyhash.c``:
422422

423-
* ``siphash24`` (default).
424-
* ``fnv``;
423+
* ``siphash13`` (default);
424+
* ``siphash24``;
425+
* ``fnv``.
425426

426427
.. versionadded:: 3.4
427428

429+
.. versionadded:: 3.11
430+
``siphash13`` is added and it is the new default.
431+
428432
.. cmdoption:: --with-builtin-hashlib-hashes=md5,sha1,sha256,sha512,sha3,blake2
429433

430434
Built-in hash modules:

Doc/whatsnew/3.11.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,11 @@ Other CPython Implementation Changes
175175
support :class:`typing.SupportsComplex` and :class:`typing.SupportsBytes` protocols.
176176
(Contributed by Mark Dickinson and Dong-hee Na in :issue:`24234`.)
177177

178+
* ``siphash13`` is added as a new internal hashing algorithms. It's has similar security
179+
properties as ``siphash24`` but it is slightly faster for long inputs. ``str``, ``bytes``,
180+
and some other types now use it as default algorithm for ``hash()``. :pep:`552`
181+
hash-based pyc files now use ``siphash13``, too.
182+
(Contributed by Inada Naoki in :issue:`29410`.)
178183

179184
New Modules
180185
===========

Include/pyhash.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,11 +114,10 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
114114

115115
/* hash algorithm selection
116116
*
117-
* The values for Py_HASH_SIPHASH24 and Py_HASH_FNV are hard-coded in the
117+
* The values for Py_HASH_* are hard-coded in the
118118
* configure script.
119119
*
120-
* - FNV is available on all platforms and architectures.
121-
* - SIPHASH24 only works on platforms that don't require aligned memory for integers.
120+
* - FNV and SIPHASH* are available on all platforms and architectures.
122121
* - With EXTERNAL embedders can provide an alternative implementation with::
123122
*
124123
* PyHash_FuncDef PyHash_Func = {...};
@@ -128,10 +127,11 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
128127
#define Py_HASH_EXTERNAL 0
129128
#define Py_HASH_SIPHASH24 1
130129
#define Py_HASH_FNV 2
130+
#define Py_HASH_SIPHASH13 3
131131

132132
#ifndef Py_HASH_ALGORITHM
133133
# ifndef HAVE_ALIGNED_REQUIRED
134-
# define Py_HASH_ALGORITHM Py_HASH_SIPHASH24
134+
# define Py_HASH_ALGORITHM Py_HASH_SIPHASH13
135135
# else
136136
# define Py_HASH_ALGORITHM Py_HASH_FNV
137137
# endif /* uint64_t && uint32_t && aligned */

Lib/test/test_hash.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ def pysiphash(uint64):
4242

4343
def skip_unless_internalhash(test):
4444
"""Skip decorator for tests that depend on SipHash24 or FNV"""
45-
ok = sys.hash_info.algorithm in {"fnv", "siphash24"}
46-
msg = "Requires SipHash24 or FNV"
45+
ok = sys.hash_info.algorithm in {"fnv", "siphash13", "siphash24"}
46+
msg = "Requires SipHash13, SipHash24 or FNV"
4747
return test if ok else unittest.skip(msg)(test)
4848

4949

@@ -206,6 +206,19 @@ class StringlikeHashRandomizationTests(HashRandomizationTests):
206206
# seed 42, 'abc'
207207
[-678966196, 573763426263223372, -820489388, -4282905804826039665],
208208
],
209+
'siphash13': [
210+
# NOTE: PyUCS2 layout depends on endianness
211+
# seed 0, 'abc'
212+
[69611762, -4594863902769663758, 69611762, -4594863902769663758],
213+
# seed 42, 'abc'
214+
[-975800855, 3869580338025362921, -975800855, 3869580338025362921],
215+
# seed 42, 'abcdefghijk'
216+
[-595844228, 7764564197781545852, -595844228, 7764564197781545852],
217+
# seed 0, 'äú∑ℇ'
218+
[-1093288643, -2810468059467891395, -1041341092, 4925090034378237276],
219+
# seed 42, 'äú∑ℇ'
220+
[-585999602, -2845126246016066802, -817336969, -2219421378907968137],
221+
],
209222
'siphash24': [
210223
# NOTE: PyUCS2 layout depends on endianness
211224
# seed 0, 'abc'

Lib/test/test_imp.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -351,8 +351,8 @@ def test_issue_35321(self):
351351
self.assertEqual(_frozen_importlib.__spec__.origin, "frozen")
352352

353353
def test_source_hash(self):
354-
self.assertEqual(_imp.source_hash(42, b'hi'), b'\xc6\xe7Z\r\x03:}\xab')
355-
self.assertEqual(_imp.source_hash(43, b'hi'), b'\x85\x9765\xf8\x9a\x8b9')
354+
self.assertEqual(_imp.source_hash(42, b'hi'), b'\xfb\xd9G\x05\xaf$\x9b~')
355+
self.assertEqual(_imp.source_hash(43, b'hi'), b'\xd0/\x87C\xccC\xff\xe2')
356356

357357
def test_pyc_invalidation_mode_from_cmdline(self):
358358
cases = [

Lib/test/test_sys.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -508,16 +508,18 @@ def test_attributes(self):
508508
self.assertIsInstance(sys.hash_info.nan, int)
509509
self.assertIsInstance(sys.hash_info.imag, int)
510510
algo = sysconfig.get_config_var("Py_HASH_ALGORITHM")
511-
if sys.hash_info.algorithm in {"fnv", "siphash24"}:
511+
if sys.hash_info.algorithm in {"fnv", "siphash13", "siphash24"}:
512512
self.assertIn(sys.hash_info.hash_bits, {32, 64})
513513
self.assertIn(sys.hash_info.seed_bits, {32, 64, 128})
514514

515515
if algo == 1:
516516
self.assertEqual(sys.hash_info.algorithm, "siphash24")
517517
elif algo == 2:
518518
self.assertEqual(sys.hash_info.algorithm, "fnv")
519+
elif algo == 3:
520+
self.assertEqual(sys.hash_info.algorithm, "siphash13")
519521
else:
520-
self.assertIn(sys.hash_info.algorithm, {"fnv", "siphash24"})
522+
self.assertIn(sys.hash_info.algorithm, {"fnv", "siphash13", "siphash24"})
521523
else:
522524
# PY_HASH_EXTERNAL
523525
self.assertEqual(algo, 0)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add SipHash13 for string hash algorithm and use it by default.

Python/pyhash.c

Lines changed: 72 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -358,19 +358,72 @@ static PyHash_FuncDef PyHash_Func = {fnv, "fnv", 8 * SIZEOF_PY_HASH_T,
358358
# define ROTATE(x, b) (uint64_t)( ((x) << (b)) | ( (x) >> (64 - (b))) )
359359
#endif
360360

361-
#define HALF_ROUND(a,b,c,d,s,t) \
362-
a += b; c += d; \
361+
#define HALF_ROUND(a,b,c,d,s,t) \
362+
a += b; c += d; \
363363
b = ROTATE(b, s) ^ a; \
364364
d = ROTATE(d, t) ^ c; \
365365
a = ROTATE(a, 32);
366366

367-
#define DOUBLE_ROUND(v0,v1,v2,v3) \
368-
HALF_ROUND(v0,v1,v2,v3,13,16); \
369-
HALF_ROUND(v2,v1,v0,v3,17,21); \
370-
HALF_ROUND(v0,v1,v2,v3,13,16); \
367+
#define SINGLE_ROUND(v0,v1,v2,v3) \
368+
HALF_ROUND(v0,v1,v2,v3,13,16); \
371369
HALF_ROUND(v2,v1,v0,v3,17,21);
372370

371+
#define DOUBLE_ROUND(v0,v1,v2,v3) \
372+
SINGLE_ROUND(v0,v1,v2,v3); \
373+
SINGLE_ROUND(v0,v1,v2,v3);
374+
373375

376+
static uint64_t
377+
siphash13(uint64_t k0, uint64_t k1, const void *src, Py_ssize_t src_sz) {
378+
uint64_t b = (uint64_t)src_sz << 56;
379+
const uint8_t *in = (const uint8_t*)src;
380+
381+
uint64_t v0 = k0 ^ 0x736f6d6570736575ULL;
382+
uint64_t v1 = k1 ^ 0x646f72616e646f6dULL;
383+
uint64_t v2 = k0 ^ 0x6c7967656e657261ULL;
384+
uint64_t v3 = k1 ^ 0x7465646279746573ULL;
385+
386+
uint64_t t;
387+
uint8_t *pt;
388+
389+
while (src_sz >= 8) {
390+
uint64_t mi;
391+
memcpy(&mi, in, sizeof(mi));
392+
mi = _le64toh(mi);
393+
in += sizeof(mi);
394+
src_sz -= sizeof(mi);
395+
v3 ^= mi;
396+
SINGLE_ROUND(v0,v1,v2,v3);
397+
v0 ^= mi;
398+
}
399+
400+
t = 0;
401+
pt = (uint8_t *)&t;
402+
switch (src_sz) {
403+
case 7: pt[6] = in[6]; /* fall through */
404+
case 6: pt[5] = in[5]; /* fall through */
405+
case 5: pt[4] = in[4]; /* fall through */
406+
case 4: memcpy(pt, in, sizeof(uint32_t)); break;
407+
case 3: pt[2] = in[2]; /* fall through */
408+
case 2: pt[1] = in[1]; /* fall through */
409+
case 1: pt[0] = in[0]; /* fall through */
410+
}
411+
b |= _le64toh(t);
412+
413+
v3 ^= b;
414+
SINGLE_ROUND(v0,v1,v2,v3);
415+
v0 ^= b;
416+
v2 ^= 0xff;
417+
SINGLE_ROUND(v0,v1,v2,v3);
418+
SINGLE_ROUND(v0,v1,v2,v3);
419+
SINGLE_ROUND(v0,v1,v2,v3);
420+
421+
/* modified */
422+
t = (v0 ^ v1) ^ (v2 ^ v3);
423+
return t;
424+
}
425+
426+
#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
374427
static uint64_t
375428
siphash24(uint64_t k0, uint64_t k1, const void *src, Py_ssize_t src_sz) {
376429
uint64_t b = (uint64_t)src_sz << 56;
@@ -419,14 +472,26 @@ siphash24(uint64_t k0, uint64_t k1, const void *src, Py_ssize_t src_sz) {
419472
t = (v0 ^ v1) ^ (v2 ^ v3);
420473
return t;
421474
}
475+
#endif
422476

423477
uint64_t
424478
_Py_KeyedHash(uint64_t key, const void *src, Py_ssize_t src_sz)
425479
{
426-
return siphash24(key, 0, src, src_sz);
480+
return siphash13(key, 0, src, src_sz);
427481
}
428482

429483

484+
#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH13
485+
static Py_hash_t
486+
pysiphash(const void *src, Py_ssize_t src_sz) {
487+
return (Py_hash_t)siphash13(
488+
_le64toh(_Py_HashSecret.siphash.k0), _le64toh(_Py_HashSecret.siphash.k1),
489+
src, src_sz);
490+
}
491+
492+
static PyHash_FuncDef PyHash_Func = {pysiphash, "siphash13", 64, 128};
493+
#endif
494+
430495
#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
431496
static Py_hash_t
432497
pysiphash(const void *src, Py_ssize_t src_sz) {

configure

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1561,9 +1561,9 @@ Optional Packages:
15611561
--with-undefined-behavior-sanitizer
15621562
enable UndefinedBehaviorSanitizer undefined
15631563
behaviour detector, 'ubsan' (default is no)
1564-
--with-hash-algorithm=[fnv|siphash24]
1564+
--with-hash-algorithm=[fnv|siphash13|siphash24]
15651565
select hash algorithm for use in Python/pyhash.c
1566-
(default is SipHash24)
1566+
(default is SipHash13)
15671567
--with-tzpath=<list of absolute paths separated by pathsep>
15681568
Select the default time zone search path for zoneinfo.TZPATH
15691569

@@ -10431,6 +10431,10 @@ if test "${with_hash_algorithm+set}" = set; then :
1043110431
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $withval" >&5
1043210432
$as_echo "$withval" >&6; }
1043310433
case "$withval" in
10434+
siphash13)
10435+
$as_echo "#define Py_HASH_ALGORITHM 3" >>confdefs.h
10436+
10437+
;;
1043410438
siphash24)
1043510439
$as_echo "#define Py_HASH_ALGORITHM 1" >>confdefs.h
1043610440

configure.ac

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3036,16 +3036,19 @@ fi
30363036
# str, bytes and memoryview hash algorithm
30373037
AH_TEMPLATE(Py_HASH_ALGORITHM,
30383038
[Define hash algorithm for str, bytes and memoryview.
3039-
SipHash24: 1, FNV: 2, externally defined: 0])
3039+
SipHash24: 1, FNV: 2, SipHash13: 3, externally defined: 0])
30403040

30413041
AC_MSG_CHECKING(for --with-hash-algorithm)
30423042
dnl quadrigraphs "@<:@" and "@:>@" produce "[" and "]" in the output
30433043
AC_ARG_WITH(hash_algorithm,
3044-
AS_HELP_STRING([--with-hash-algorithm=@<:@fnv|siphash24@:>@],
3045-
[select hash algorithm for use in Python/pyhash.c (default is SipHash24)]),
3044+
AS_HELP_STRING([--with-hash-algorithm=@<:@fnv|siphash13|siphash24@:>@],
3045+
[select hash algorithm for use in Python/pyhash.c (default is SipHash13)]),
30463046
[
30473047
AC_MSG_RESULT($withval)
30483048
case "$withval" in
3049+
siphash13)
3050+
AC_DEFINE(Py_HASH_ALGORITHM, 3)
3051+
;;
30493052
siphash24)
30503053
AC_DEFINE(Py_HASH_ALGORITHM, 1)
30513054
;;

pyconfig.h.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1439,7 +1439,7 @@
14391439
#undef Py_ENABLE_SHARED
14401440

14411441
/* Define hash algorithm for str, bytes and memoryview. SipHash24: 1, FNV: 2,
1442-
externally defined: 0 */
1442+
SipHash13: 3, externally defined: 0 */
14431443
#undef Py_HASH_ALGORITHM
14441444

14451445
/* Define if you want to enable tracing references for debugging purpose */

0 commit comments

Comments
 (0)