Skip to content

Commit d8e69b2

Browse files
authored
gh-122854: Add Py_HashBuffer() function (#122855)
1 parent 3d60dfb commit d8e69b2

File tree

17 files changed

+80
-25
lines changed

17 files changed

+80
-25
lines changed

Doc/c-api/hash.rst

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,25 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`.
8989
9090
.. versionadded:: 3.13
9191
92+
93+
.. c:function:: Py_hash_t Py_HashBuffer(const void *ptr, Py_ssize_t len)
94+
95+
Compute and return the hash value of a buffer of *len* bytes
96+
starting at address *ptr*. The hash is guaranteed to match that of
97+
:class:`bytes`, :class:`memoryview`, and other built-in objects
98+
that implement the :ref:`buffer protocol <bufferobjects>`.
99+
100+
Use this function to implement hashing for immutable objects whose
101+
:c:member:`~PyTypeObject.tp_richcompare` function compares to another
102+
object's buffer.
103+
104+
*len* must be greater than or equal to ``0``.
105+
106+
This function always succeeds.
107+
108+
.. versionadded:: 3.14
109+
110+
92111
.. c:function:: Py_hash_t PyObject_GenericHash(PyObject *obj)
93112
94113
Generic hashing function that is meant to be put into a type

Doc/whatsnew/3.14.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,9 @@ New Features
489489
similar to ``sep.join(iterable)`` in Python.
490490
(Contributed by Victor Stinner in :gh:`121645`.)
491491

492+
* Add :c:func:`Py_HashBuffer` to compute and return the hash value of a buffer.
493+
(Contributed by Antoine Pitrou and Victor Stinner in :gh:`122854`.)
494+
492495

493496
Porting to Python 3.14
494497
----------------------

Include/cpython/pyhash.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,5 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
4545

4646
PyAPI_FUNC(Py_hash_t) Py_HashPointer(const void *ptr);
4747
PyAPI_FUNC(Py_hash_t) PyObject_GenericHash(PyObject *);
48+
49+
PyAPI_FUNC(Py_hash_t) Py_HashBuffer(const void *ptr, Py_ssize_t len);

Include/internal/pycore_pyhash.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,6 @@ _Py_HashPointerRaw(const void *ptr)
2020
return (Py_hash_t)x;
2121
}
2222

23-
// Export for '_datetime' shared extension
24-
PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);
25-
2623
/* Hash secret
2724
*
2825
* memory layout on 64 bit systems

Lib/test/test_capi/test_hash.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,16 @@ def python_hash_pointer(x):
7878
VOID_P_MAX = -1 & (2 ** (8 * SIZEOF_VOID_P) - 1)
7979
self.assertEqual(hash_pointer(VOID_P_MAX), -2)
8080

81+
def test_hash_buffer(self):
82+
hash_buffer = _testcapi.hash_buffer
83+
84+
def check(data):
85+
self.assertEqual(hash_buffer(data), hash(data))
86+
87+
check(b'')
88+
check(b'abc')
89+
check(b'x' * 1024)
90+
8191

8292
if __name__ == "__main__":
8393
unittest.main()
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Add :c:func:`Py_HashBuffer` to compute and return the hash value of a buffer.
2+
Patch by Antoine Pitrou and Victor Stinner.

Modules/_datetimemodule.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3842,7 +3842,7 @@ datetime_date_replace_impl(PyDateTime_Date *self, int year, int month,
38423842
static Py_hash_t
38433843
generic_hash(unsigned char *data, int len)
38443844
{
3845-
return _Py_HashBytes(data, len);
3845+
return Py_HashBuffer(data, len);
38463846
}
38473847

38483848

Modules/_hashopenssl.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
#include <stdbool.h>
2626
#include "Python.h"
2727
#include "pycore_hashtable.h"
28-
#include "pycore_pyhash.h" // _Py_HashBytes()
2928
#include "pycore_strhex.h" // _Py_strhex()
3029
#include "hashlib.h"
3130

@@ -186,7 +185,7 @@ static const py_hashentry_t py_hashes[] = {
186185

187186
static Py_uhash_t
188187
py_hashentry_t_hash_name(const void *key) {
189-
return _Py_HashBytes(key, strlen((const char *)key));
188+
return Py_HashBuffer(key, strlen((const char *)key));
190189
}
191190

192191
static int

Modules/_sre/sre.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2944,7 +2944,7 @@ pattern_hash(PatternObject *self)
29442944
return -1;
29452945
}
29462946

2947-
hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2947+
hash2 = Py_HashBuffer(self->code, sizeof(self->code[0]) * self->codesize);
29482948
hash ^= hash2;
29492949

29502950
hash ^= self->flags;

Modules/_testcapi/hash.c

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,14 @@ hash_getfuncdef(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args))
4545
}
4646

4747

48+
static PyObject *
49+
long_from_hash(Py_hash_t hash)
50+
{
51+
Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash));
52+
return PyLong_FromLongLong(hash);
53+
}
54+
55+
4856
static PyObject *
4957
hash_pointer(PyObject *Py_UNUSED(module), PyObject *arg)
5058
{
@@ -54,8 +62,21 @@ hash_pointer(PyObject *Py_UNUSED(module), PyObject *arg)
5462
}
5563

5664
Py_hash_t hash = Py_HashPointer(ptr);
57-
Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash));
58-
return PyLong_FromLongLong(hash);
65+
return long_from_hash(hash);
66+
}
67+
68+
69+
static PyObject *
70+
hash_buffer(PyObject *Py_UNUSED(module), PyObject *args)
71+
{
72+
char *ptr;
73+
Py_ssize_t len;
74+
if (!PyArg_ParseTuple(args, "y#", &ptr, &len)) {
75+
return NULL;
76+
}
77+
78+
Py_hash_t hash = Py_HashBuffer(ptr, len);
79+
return long_from_hash(hash);
5980
}
6081

6182

@@ -64,14 +85,14 @@ object_generichash(PyObject *Py_UNUSED(module), PyObject *arg)
6485
{
6586
NULLABLE(arg);
6687
Py_hash_t hash = PyObject_GenericHash(arg);
67-
Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash));
68-
return PyLong_FromLongLong(hash);
88+
return long_from_hash(hash);
6989
}
7090

7191

7292
static PyMethodDef test_methods[] = {
7393
{"hash_getfuncdef", hash_getfuncdef, METH_NOARGS},
7494
{"hash_pointer", hash_pointer, METH_O},
95+
{"hash_buffer", hash_buffer, METH_VARARGS},
7596
{"object_generichash", object_generichash, METH_O},
7697
{NULL},
7798
};

Modules/_xxtestfuzz/fuzzer.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
#endif
1616

1717
#include <Python.h>
18-
#include "pycore_pyhash.h" // _Py_HashBytes()
1918
#include <stdlib.h>
2019
#include <inttypes.h>
2120

@@ -45,7 +44,7 @@ static int fuzz_builtin_int(const char* data, size_t size) {
4544
/* Pick a random valid base. (When the fuzzed function takes extra
4645
parameters, it's somewhat normal to hash the input to generate those
4746
parameters. We want to exercise all code paths, so we do so here.) */
48-
int base = _Py_HashBytes(data, size) % 37;
47+
int base = Py_HashBuffer(data, size) % 37;
4948
if (base == 1) {
5049
// 1 is the only number between 0 and 36 that is not a valid base.
5150
base = 0;

Objects/bytesobject.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1598,7 +1598,7 @@ _Py_COMP_DIAG_PUSH
15981598
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
15991599
if (a->ob_shash == -1) {
16001600
/* Can't fail */
1601-
a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
1601+
a->ob_shash = Py_HashBuffer(a->ob_sval, Py_SIZE(a));
16021602
}
16031603
return a->ob_shash;
16041604
_Py_COMP_DIAG_POP

Objects/codeobject.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2561,12 +2561,12 @@ hash_const(const void *key)
25612561
if (PySlice_Check(op)) {
25622562
PySliceObject *s = (PySliceObject *)op;
25632563
PyObject *data[3] = { s->start, s->stop, s->step };
2564-
return _Py_HashBytes(&data, sizeof(data));
2564+
return Py_HashBuffer(&data, sizeof(data));
25652565
}
25662566
else if (PyTuple_CheckExact(op)) {
25672567
Py_ssize_t size = PyTuple_GET_SIZE(op);
25682568
PyObject **data = _PyTuple_ITEMS(op);
2569-
return _Py_HashBytes(data, sizeof(PyObject *) * size);
2569+
return Py_HashBuffer(data, sizeof(PyObject *) * size);
25702570
}
25712571
Py_hash_t h = PyObject_Hash(op);
25722572
if (h == -1) {

Objects/memoryobject.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3087,7 +3087,7 @@ memory_hash(PyObject *_self)
30873087
}
30883088

30893089
/* Can't fail */
3090-
self->hash = _Py_HashBytes(mem, view->len);
3090+
self->hash = Py_HashBuffer(mem, view->len);
30913091

30923092
if (mem != view->buf)
30933093
PyMem_Free(mem);

Objects/unicodeobject.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11688,7 +11688,7 @@ unicode_hash(PyObject *self)
1168811688
if (hash != -1) {
1168911689
return hash;
1169011690
}
11691-
x = _Py_HashBytes(PyUnicode_DATA(self),
11691+
x = Py_HashBuffer(PyUnicode_DATA(self),
1169211692
PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self));
1169311693

1169411694
FT_ATOMIC_STORE_SSIZE_RELAXED(_PyUnicode_HASH(self), x);

Python/import.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1174,7 +1174,7 @@ hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep)
11741174
static Py_uhash_t
11751175
hashtable_hash_str(const void *key)
11761176
{
1177-
return _Py_HashBytes(key, strlen((const char *)key));
1177+
return Py_HashBuffer(key, strlen((const char *)key));
11781178
}
11791179

11801180
static int

Python/pyhash.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ extern PyHash_FuncDef PyHash_Func;
2222
static PyHash_FuncDef PyHash_Func;
2323
#endif
2424

25-
/* Count _Py_HashBytes() calls */
25+
/* Count Py_HashBuffer() calls */
2626
#ifdef Py_HASH_STATS
2727
#define Py_HASH_STATS_MAX 32
2828
static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
@@ -146,9 +146,8 @@ PyObject_GenericHash(PyObject *obj)
146146
}
147147

148148
Py_hash_t
149-
_Py_HashBytes(const void *src, Py_ssize_t len)
149+
Py_HashBuffer(const void *ptr, Py_ssize_t len)
150150
{
151-
Py_hash_t x;
152151
/*
153152
We make the hash of the empty string be 0, rather than using
154153
(prefix ^ suffix), since this slightly obfuscates the hash secret
@@ -161,11 +160,12 @@ _Py_HashBytes(const void *src, Py_ssize_t len)
161160
hashstats[(len <= Py_HASH_STATS_MAX) ? len : 0]++;
162161
#endif
163162

163+
Py_hash_t x;
164164
#if Py_HASH_CUTOFF > 0
165165
if (len < Py_HASH_CUTOFF) {
166166
/* Optimize hashing of very small strings with inline DJBX33A. */
167167
Py_uhash_t hash;
168-
const unsigned char *p = src;
168+
const unsigned char *p = ptr;
169169
hash = 5381; /* DJBX33A starts with 5381 */
170170

171171
switch(len) {
@@ -186,10 +186,13 @@ _Py_HashBytes(const void *src, Py_ssize_t len)
186186
}
187187
else
188188
#endif /* Py_HASH_CUTOFF */
189-
x = PyHash_Func.hash(src, len);
189+
{
190+
x = PyHash_Func.hash(ptr, len);
191+
}
190192

191-
if (x == -1)
193+
if (x == -1) {
192194
return -2;
195+
}
193196
return x;
194197
}
195198

0 commit comments

Comments
 (0)