Skip to content

Commit 995b5d3

Browse files
tirangpshead
authored andcommitted
bpo-38153: Normalize hashlib algorithm names (GH-16083)
Signed-off-by: Christian Heimes <[email protected]>
1 parent 375a3e2 commit 995b5d3

File tree

4 files changed

+179
-32
lines changed

4 files changed

+179
-32
lines changed

Lib/hashlib.py

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -70,37 +70,44 @@
7070

7171
__builtin_constructor_cache = {}
7272

73+
__block_openssl_constructor = {
74+
'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512',
75+
'shake_128', 'shake_256',
76+
'blake2b', 'blake2s',
77+
}
78+
7379
def __get_builtin_constructor(name):
7480
cache = __builtin_constructor_cache
7581
constructor = cache.get(name)
7682
if constructor is not None:
7783
return constructor
7884
try:
79-
if name in ('SHA1', 'sha1'):
85+
if name in {'SHA1', 'sha1'}:
8086
import _sha1
8187
cache['SHA1'] = cache['sha1'] = _sha1.sha1
82-
elif name in ('MD5', 'md5'):
88+
elif name in {'MD5', 'md5'}:
8389
import _md5
8490
cache['MD5'] = cache['md5'] = _md5.md5
85-
elif name in ('SHA256', 'sha256', 'SHA224', 'sha224'):
91+
elif name in {'SHA256', 'sha256', 'SHA224', 'sha224'}:
8692
import _sha256
8793
cache['SHA224'] = cache['sha224'] = _sha256.sha224
8894
cache['SHA256'] = cache['sha256'] = _sha256.sha256
89-
elif name in ('SHA512', 'sha512', 'SHA384', 'sha384'):
95+
elif name in {'SHA512', 'sha512', 'SHA384', 'sha384'}:
9096
import _sha512
9197
cache['SHA384'] = cache['sha384'] = _sha512.sha384
9298
cache['SHA512'] = cache['sha512'] = _sha512.sha512
93-
elif name in ('blake2b', 'blake2s'):
99+
elif name in {'blake2b', 'blake2s'}:
94100
import _blake2
95101
cache['blake2b'] = _blake2.blake2b
96102
cache['blake2s'] = _blake2.blake2s
97-
elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512',
98-
'shake_128', 'shake_256'}:
103+
elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512'}:
99104
import _sha3
100105
cache['sha3_224'] = _sha3.sha3_224
101106
cache['sha3_256'] = _sha3.sha3_256
102107
cache['sha3_384'] = _sha3.sha3_384
103108
cache['sha3_512'] = _sha3.sha3_512
109+
elif name in {'shake_128', 'shake_256'}:
110+
import _sha3
104111
cache['shake_128'] = _sha3.shake_128
105112
cache['shake_256'] = _sha3.shake_256
106113
except ImportError:
@@ -114,8 +121,8 @@ def __get_builtin_constructor(name):
114121

115122

116123
def __get_openssl_constructor(name):
117-
if name in {'blake2b', 'blake2s'}:
118-
# Prefer our blake2 implementation.
124+
if name in __block_openssl_constructor:
125+
# Prefer our blake2 and sha3 implementation.
119126
return __get_builtin_constructor(name)
120127
try:
121128
f = getattr(_hashlib, 'openssl_' + name)
@@ -140,8 +147,8 @@ def __hash_new(name, data=b'', **kwargs):
140147
"""new(name, data=b'') - Return a new hashing object using the named algorithm;
141148
optionally initialized with data (which must be a bytes-like object).
142149
"""
143-
if name in {'blake2b', 'blake2s'}:
144-
# Prefer our blake2 implementation.
150+
if name in __block_openssl_constructor:
151+
# Prefer our blake2 and sha3 implementation
145152
# OpenSSL 1.1.0 comes with a limited implementation of blake2b/s.
146153
# It does neither support keyed blake2 nor advanced features like
147154
# salt, personal, tree hashing or SSE.

Lib/test/test_hashlib.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@
2626
c_hashlib = import_fresh_module('hashlib', fresh=['_hashlib'])
2727
py_hashlib = import_fresh_module('hashlib', blocked=['_hashlib'])
2828

29+
try:
30+
from _hashlib import HASH
31+
except ImportError:
32+
HASH = None
33+
2934
try:
3035
import _blake2
3136
except ImportError:
@@ -386,6 +391,9 @@ def check_sha3(self, name, capacity, rate, suffix):
386391
constructors = self.constructors_to_test[name]
387392
for hash_object_constructor in constructors:
388393
m = hash_object_constructor()
394+
if HASH is not None and isinstance(m, HASH):
395+
# _hashopenssl's variant does not have extra SHA3 attributes
396+
continue
389397
self.assertEqual(capacity + rate, 1600)
390398
self.assertEqual(m._capacity_bits, capacity)
391399
self.assertEqual(m._rate_bits, rate)
@@ -985,6 +993,10 @@ def test_scrypt(self):
985993
hashlib.scrypt(b'password', salt=b'salt', n=2, r=8, p=1,
986994
dklen=dklen)
987995

996+
def test_normalized_name(self):
997+
self.assertNotIn("blake2b512", hashlib.algorithms_available)
998+
self.assertNotIn("sha3-512", hashlib.algorithms_available)
999+
9881000

9891001
if __name__ == "__main__":
9901002
unittest.main()
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Names of hashing algorithms frome OpenSSL are now normalized to follow
2+
Python's naming conventions. For example OpenSSL uses sha3-512 instead of
3+
sha3_512 or blake2b512 instead of blake2b.

Modules/_hashopenssl.c

Lines changed: 146 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,14 @@
3434

3535
#define MUNCH_SIZE INT_MAX
3636

37+
#if defined(NID_sha3_224) && defined(EVP_MD_FLAG_XOF)
38+
#define PY_OPENSSL_HAS_SHA3 1
39+
#endif
40+
41+
#ifdef NID_blake2b512
42+
#define PY_OPENSSL_HAS_BLAKE2 1
43+
#endif
44+
3745
typedef struct {
3846
PyObject_HEAD
3947
EVP_MD_CTX *ctx; /* OpenSSL message digest context */
@@ -82,6 +90,135 @@ _setException(PyObject *exc)
8290
}
8391
/* LCOV_EXCL_STOP */
8492

93+
static PyObject*
94+
py_digest_name(const EVP_MD *md)
95+
{
96+
int nid = EVP_MD_nid(md);
97+
const char *name = NULL;
98+
99+
/* Hard-coded names for well-known hashing algorithms.
100+
* OpenSSL uses slightly different names algorithms like SHA3.
101+
*/
102+
switch (nid) {
103+
case NID_md5:
104+
name = "md5";
105+
break;
106+
case NID_sha1:
107+
name = "sha1";
108+
break;
109+
case NID_sha224:
110+
name ="sha224";
111+
break;
112+
case NID_sha256:
113+
name ="sha256";
114+
break;
115+
case NID_sha384:
116+
name ="sha384";
117+
break;
118+
case NID_sha512:
119+
name ="sha512";
120+
break;
121+
#ifdef NID_sha512_224
122+
case NID_sha512_224:
123+
name ="sha512_224";
124+
break;
125+
case NID_sha512_256:
126+
name ="sha512_256";
127+
break;
128+
#endif
129+
#ifdef PY_OPENSSL_HAS_SHA3
130+
case NID_sha3_224:
131+
name ="sha3_224";
132+
break;
133+
case NID_sha3_256:
134+
name ="sha3_256";
135+
break;
136+
case NID_sha3_384:
137+
name ="sha3_384";
138+
break;
139+
case NID_sha3_512:
140+
name ="sha3_512";
141+
break;
142+
case NID_shake128:
143+
name ="shake_128";
144+
break;
145+
case NID_shake256:
146+
name ="shake_256";
147+
break;
148+
#endif
149+
#ifdef PY_OPENSSL_HAS_BLAKE2
150+
case NID_blake2s256:
151+
name ="blake2s";
152+
break;
153+
case NID_blake2b512:
154+
name ="blake2b";
155+
break;
156+
#endif
157+
default:
158+
/* Ignore aliased names and only use long, lowercase name. The aliases
159+
* pollute the list and OpenSSL appears to have its own definition of
160+
* alias as the resulting list still contains duplicate and alternate
161+
* names for several algorithms.
162+
*/
163+
name = OBJ_nid2ln(nid);
164+
if (name == NULL)
165+
name = OBJ_nid2sn(nid);
166+
break;
167+
}
168+
169+
return PyUnicode_FromString(name);
170+
}
171+
172+
static const EVP_MD*
173+
py_digest_by_name(const char *name)
174+
{
175+
const EVP_MD *digest = EVP_get_digestbyname(name);
176+
177+
/* OpenSSL uses dash instead of underscore in names of some algorithms
178+
* like SHA3 and SHAKE. Detect different spellings. */
179+
if (digest == NULL) {
180+
#ifdef NID_sha512_224
181+
if (!strcmp(name, "sha512_224") || !strcmp(name, "SHA512_224")) {
182+
digest = EVP_sha512_224();
183+
}
184+
else if (!strcmp(name, "sha512_256") || !strcmp(name, "SHA512_256")) {
185+
digest = EVP_sha512_256();
186+
}
187+
#endif
188+
#ifdef PY_OPENSSL_HAS_SHA3
189+
/* could be sha3_ or shake_, Python never defined upper case */
190+
else if (!strcmp(name, "sha3_224")) {
191+
digest = EVP_sha3_224();
192+
}
193+
else if (!strcmp(name, "sha3_256")) {
194+
digest = EVP_sha3_256();
195+
}
196+
else if (!strcmp(name, "sha3_384")) {
197+
digest = EVP_sha3_384();
198+
}
199+
else if (!strcmp(name, "sha3_512")) {
200+
digest = EVP_sha3_512();
201+
}
202+
else if (!strcmp(name, "shake_128")) {
203+
digest = EVP_shake128();
204+
}
205+
else if (!strcmp(name, "shake_256")) {
206+
digest = EVP_shake256();
207+
}
208+
#endif
209+
#ifdef PY_OPENSSL_HAS_BLAKE2
210+
else if (!strcmp(name, "blake2s256")) {
211+
digest = EVP_blake2s256();
212+
}
213+
else if (!strcmp(name, "blake2b512")) {
214+
digest = EVP_blake2b512();
215+
}
216+
#endif
217+
}
218+
219+
return digest;
220+
}
221+
85222
static EVPobject *
86223
newEVPobject(void)
87224
{
@@ -304,16 +441,7 @@ EVP_get_digest_size(EVPobject *self, void *closure)
304441
static PyObject *
305442
EVP_get_name(EVPobject *self, void *closure)
306443
{
307-
const char *name = EVP_MD_name(EVP_MD_CTX_md(self->ctx));
308-
PyObject *name_obj, *name_lower;
309-
310-
name_obj = PyUnicode_FromString(name);
311-
if (!name_obj) {
312-
return NULL;
313-
}
314-
name_lower = PyObject_CallMethod(name_obj, "lower", NULL);
315-
Py_DECREF(name_obj);
316-
return name_lower;
444+
return py_digest_name(EVP_MD_CTX_md(self->ctx));
317445
}
318446

319447
static PyGetSetDef EVP_getseters[] = {
@@ -337,7 +465,7 @@ static PyObject *
337465
EVP_repr(EVPobject *self)
338466
{
339467
PyObject *name_obj, *repr;
340-
name_obj = EVP_get_name(self, NULL);
468+
name_obj = py_digest_name(EVP_MD_CTX_md(self->ctx));
341469
if (!name_obj) {
342470
return NULL;
343471
}
@@ -403,6 +531,7 @@ static PyTypeObject EVPtype = {
403531
0, /* tp_dictoffset */
404532
};
405533

534+
\
406535
static PyObject *
407536
EVPnew(const EVP_MD *digest,
408537
const unsigned char *cp, Py_ssize_t len, int usedforsecurity)
@@ -485,7 +614,7 @@ EVP_new_impl(PyObject *module, PyObject *name_obj, PyObject *data_obj,
485614
if (data_obj)
486615
GET_BUFFER_VIEW_OR_ERROUT(data_obj, &view);
487616

488-
digest = EVP_get_digestbyname(name);
617+
digest = py_digest_by_name(name);
489618

490619
ret_obj = EVPnew(digest,
491620
(unsigned char*)view.buf, view.len,
@@ -922,21 +1051,17 @@ typedef struct _internal_name_mapper_state {
9221051

9231052
/* A callback function to pass to OpenSSL's OBJ_NAME_do_all(...) */
9241053
static void
925-
_openssl_hash_name_mapper(const OBJ_NAME *openssl_obj_name, void *arg)
1054+
_openssl_hash_name_mapper(const EVP_MD *md, const char *from,
1055+
const char *to, void *arg)
9261056
{
9271057
_InternalNameMapperState *state = (_InternalNameMapperState *)arg;
9281058
PyObject *py_name;
9291059

9301060
assert(state != NULL);
931-
if (openssl_obj_name == NULL)
932-
return;
933-
/* Ignore aliased names, they pollute the list and OpenSSL appears to
934-
* have its own definition of alias as the resulting list still
935-
* contains duplicate and alternate names for several algorithms. */
936-
if (openssl_obj_name->alias)
1061+
if (md == NULL)
9371062
return;
9381063

939-
py_name = PyUnicode_FromString(openssl_obj_name->name);
1064+
py_name = py_digest_name(md);
9401065
if (py_name == NULL) {
9411066
state->error = 1;
9421067
} else {
@@ -958,7 +1083,7 @@ generate_hash_name_list(void)
9581083
return NULL;
9591084
state.error = 0;
9601085

961-
OBJ_NAME_do_all(OBJ_NAME_TYPE_MD_METH, &_openssl_hash_name_mapper, &state);
1086+
EVP_MD_do_all(&_openssl_hash_name_mapper, &state);
9621087

9631088
if (state.error) {
9641089
Py_DECREF(state.set);

0 commit comments

Comments
 (0)