File tree Expand file tree Collapse file tree 2 files changed +10
-2
lines changed Expand file tree Collapse file tree 2 files changed +10
-2
lines changed Original file line number Diff line number Diff line change 16
16
from pandas .util .decorators import (Appender , cache_readonly ,
17
17
deprecate_kwarg , Substitution )
18
18
from pandas .core .common import AbstractMethodError
19
- from pandas .tools .hashing import hash_pandas_object
20
19
from pandas .formats .printing import pprint_thing
21
20
22
21
_shared_docs = dict ()
@@ -838,6 +837,7 @@ def hash(self, index=True):
838
837
9751253963311919054], dtype=uint64)
839
838
840
839
"""
840
+ from pandas .tools .hashing import hash_pandas_object
841
841
return hash_pandas_object (self , index = index )
842
842
843
843
Original file line number Diff line number Diff line change 2
2
data hash pandas / numpy objects
3
3
"""
4
4
5
+ from hashlib import md5
5
6
import numpy as np
7
+ from pandas import Series
6
8
from pandas .types .generic import ABCIndexClass , ABCSeries , ABCDataFrame
7
9
from pandas .types .common import is_categorical_dtype
8
10
@@ -71,7 +73,13 @@ def hash_array(vals):
71
73
72
74
vals = vals .view ('u{}' .format (vals .dtype .itemsize )).astype ('u8' )
73
75
else :
74
- vals = np .array ([hash (x ) for x in vals ], dtype = np .uint64 )
76
+
77
+ # we want to stringify
78
+ # then apply a consistent hashing scheme
79
+ def f (v ):
80
+ return int (md5 (v ).hexdigest (), 16 ) % (10 ** 8 )
81
+ vals = Series (vals ).astype (str ).str .encode ('utf8' ).values
82
+ vals = np .array ([f (v ) for v in vals ], dtype = 'uint64' )
75
83
76
84
# Then, redistribute these 64-bit ints within the space of 64-bit ints
77
85
vals ^= vals >> 30
You can’t perform that action at this time.
0 commit comments