Skip to content

Commit 468ab13

Browse files
jschendeljreback
authored andcommitted
Modify hash(pd.NA) to avoid integer hash collisions (#30150)
1 parent fa4949f commit 468ab13

File tree

2 files changed

+24
-1
lines changed

2 files changed

+24
-1
lines changed

pandas/_libs/missing.pyx

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,17 @@ from pandas._libs.tslibs.np_datetime cimport (
1515
from pandas._libs.tslibs.nattype cimport (
1616
checknull_with_nat, c_NaT as NaT, is_null_datetimelike)
1717

18+
from pandas.compat import is_platform_32bit
19+
1820

1921
cdef:
2022
float64_t INF = <float64_t>np.inf
2123
float64_t NEGINF = -INF
2224

2325
int64_t NPY_NAT = util.get_nat()
2426

27+
bint is_32bit = is_platform_32bit()
28+
2529

2630
cpdef bint checknull(object val):
2731
"""
@@ -345,7 +349,9 @@ class NAType(C_NAType):
345349
raise TypeError("boolean value of NA is ambiguous")
346350

347351
def __hash__(self):
348-
return id(self)
352+
# GH 30013: Ensure hash is large enough to avoid hash collisions with integers
353+
exponent = 31 if is_32bit else 61
354+
return 2 ** exponent - 1
349355

350356
# Binary arithmetic and comparison ops -> propagate
351357

pandas/tests/scalar/test_na_scalar.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,3 +175,20 @@ def test_series_isna():
175175
s = pd.Series([1, NA], dtype=object)
176176
expected = pd.Series([False, True])
177177
tm.assert_series_equal(s.isna(), expected)
178+
179+
180+
def test_integer_hash_collision_dict():
181+
# GH 30013
182+
result = {NA: "foo", hash(NA): "bar"}
183+
184+
assert result[NA] == "foo"
185+
assert result[hash(NA)] == "bar"
186+
187+
188+
def test_integer_hash_collision_set():
189+
# GH 30013
190+
result = {NA, hash(NA)}
191+
192+
assert len(result) == 2
193+
assert NA in result
194+
assert hash(NA) in result

0 commit comments

Comments
 (0)