Skip to content

Commit 77246cc

Browse files
committed
Add hash map
1 parent e8b5483 commit 77246cc

File tree

2 files changed

+87
-46
lines changed

2 files changed

+87
-46
lines changed

data_structures/hashing/hash_map.py

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
"""
2+
Hash map with open addressing.
3+
4+
https://en.wikipedia.org/wiki/Hash_table
5+
6+
Another hash map implementation, with a good explanation.
7+
Modern Dictionaries by Raymond Hettinger
8+
https://www.youtube.com/watch?v=p33CVV29OG8
9+
"""
110
from collections.abc import Iterator, MutableMapping
211
from dataclasses import dataclass
312
from typing import Generic, TypeVar
@@ -24,20 +33,37 @@ def __bool__(self):
2433

2534

2635
class HashMap(MutableMapping, Generic[KEY, VAL]):
36+
"""
37+
Hash map with open addressing.
38+
"""
39+
2740
def __init__(self, initial_block_size: int = 8, capacity_factor: float = 0.75):
2841
self._initial_block_size = initial_block_size
2942
self._buckets: list[_Item | None] = [None] * initial_block_size
3043
assert 0.0 < capacity_factor < 1.0
3144
self._capacity_factor = capacity_factor
3245
self._len = 0
3346

34-
def _get_index(self, key: KEY) -> int:
47+
def _get_bucket_index(self, key: KEY) -> int:
3548
return hash(key) % len(self._buckets)
3649

3750
def _get_next_ind(self, ind: int) -> int:
51+
"""
52+
Get next index.
53+
54+
Implements linear open addressing.
55+
"""
3856
return (ind + 1) % len(self._buckets)
3957

4058
def _try_set(self, ind: int, key: KEY, val: VAL) -> bool:
59+
"""
60+
Try to add value to the bucket.
61+
62+
If bucket is empty or key is the same, does insert and return True.
63+
64+
If bucket has another key or deleted placeholder,
65+
that means that we need to check next bucket.
66+
"""
4167
stored = self._buckets[ind]
4268
if not stored:
4369
self._buckets[ind] = _Item(key, val)
@@ -50,10 +76,16 @@ def _try_set(self, ind: int, key: KEY, val: VAL) -> bool:
5076
return False
5177

5278
def _is_full(self):
79+
"""
80+
Return true if we have reached safe capacity.
81+
82+
So we need to increase the number of buckets to avoid collisions.
83+
"""
5384
limit = len(self._buckets) * self._capacity_factor
5485
return len(self) >= int(limit)
5586

5687
def _is_sparse(self):
88+
"""Return true if we need twice fewer buckets when we have now."""
5789
if len(self._buckets) <= self._initial_block_size:
5890
return False
5991
limit = len(self._buckets) * self._capacity_factor / 2
@@ -73,8 +105,8 @@ def _size_up(self):
73105
def _size_down(self):
74106
self._resize(len(self._buckets) // 2)
75107

76-
def _iterate_buckets(self, key: KEY):
77-
ind = self._get_index(key)
108+
def _iterate_buckets(self, key: KEY) -> Iterator[int]:
109+
ind = self._get_bucket_index(key)
78110
for _ in range(len(self._buckets)):
79111
yield ind
80112
ind = self._get_next_ind(ind)
@@ -85,9 +117,6 @@ def _add_item(self, key: KEY, val: VAL):
85117
break
86118

87119
def __setitem__(self, key: KEY, val: VAL) -> None:
88-
if self._is_sparse():
89-
self._size_down()
90-
91120
if self._is_full():
92121
self._size_up()
93122

@@ -104,6 +133,8 @@ def __delitem__(self, key: KEY) -> None:
104133
self._buckets[ind] = _deleted
105134
self._len -= 1
106135
break
136+
if self._is_sparse():
137+
self._size_down()
107138

108139
def __getitem__(self, key: KEY) -> VAL:
109140
for ind in self._iterate_buckets(key):

data_structures/hashing/test_hash_map.py

Lines changed: 50 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -5,66 +5,73 @@
55
from data_structures.hashing.hash_map import HashMap
66

77

8-
def get(k):
8+
def _get(k):
99
return getitem, k
1010

1111

12-
def set_(k, v):
12+
def _set(k, v):
1313
return setitem, k, v
1414

1515

16-
def delete(k):
16+
def _del(k):
1717
return delitem, k
1818

1919

20-
add_items = (
21-
set_("key_a", "val_a"),
22-
set_("key_b", "val_b"),
20+
def _run_operation(obj, fun, *args):
21+
try:
22+
return fun(obj, *args), None
23+
except Exception as e:
24+
return None, e
25+
26+
27+
_add_items = (
28+
_set("key_a", "val_a"),
29+
_set("key_b", "val_b"),
2330
)
2431

25-
overwrite_items = [
26-
set_("key_a", "val_a"),
27-
set_("key_a", "val_b"),
32+
_overwrite_items = [
33+
_set("key_a", "val_a"),
34+
_set("key_a", "val_b"),
2835
]
2936

30-
delete_items = [
31-
set_("key_a", "val_a"),
32-
set_("key_b", "val_b"),
33-
delete("key_a"),
34-
delete("key_b"),
35-
set_("key_a", "val_a"),
36-
delete("key_a"),
37+
_delete_items = [
38+
_set("key_a", "val_a"),
39+
_set("key_b", "val_b"),
40+
_del("key_a"),
41+
_del("key_b"),
42+
_set("key_a", "val_a"),
43+
_del("key_a"),
3744
]
3845

39-
access_not_existing_items = [
40-
get("key_a"),
41-
delete("key_a"),
42-
set_("key_a", "val_a"),
43-
delete("key_a"),
44-
delete("key_a"),
45-
get("key_a"),
46+
_access_absent_items = [
47+
_get("key_a"),
48+
_del("key_a"),
49+
_set("key_a", "val_a"),
50+
_del("key_a"),
51+
_del("key_a"),
52+
_get("key_a"),
4653
]
4754

48-
with_resize_up = [
49-
*[set_(x, x) for x in range(5)], # guaranteed upsize
55+
_add_with_resize_up = [
56+
*[_set(x, x) for x in range(5)], # guaranteed upsize
5057
]
5158

52-
with_resize_down = [
53-
*[set_(x, x) for x in range(5)], # guaranteed upsize
54-
*[delete(x) for x in range(5)],
55-
set_("key_a", "val_b"),
59+
_add_with_resize_down = [
60+
*[_set(x, x) for x in range(5)], # guaranteed upsize
61+
*[_del(x) for x in range(5)],
62+
_set("key_a", "val_b"),
5663
]
5764

5865

5966
@pytest.mark.parametrize(
6067
"operations",
6168
(
62-
pytest.param(add_items, id="add items"),
63-
pytest.param(overwrite_items, id="overwrite items"),
64-
pytest.param(delete_items, id="delete items"),
65-
pytest.param(access_not_existing_items, id="access none existing items"),
66-
pytest.param(with_resize_up, id="with resize up"),
67-
pytest.param(with_resize_down, id="with resize down"),
69+
pytest.param(_add_items, id="add items"),
70+
pytest.param(_overwrite_items, id="overwrite items"),
71+
pytest.param(_delete_items, id="delete items"),
72+
pytest.param(_access_absent_items, id="access absent items"),
73+
pytest.param(_add_with_resize_up, id="add with resize up"),
74+
pytest.param(_add_with_resize_down, id="add with resize down"),
6875
),
6976
)
7077
def test_hash_map_is_the_same_as_dict(operations):
@@ -80,8 +87,11 @@ def test_hash_map_is_the_same_as_dict(operations):
8087
assert set(my.items()) == set(py.items())
8188

8289

83-
def _run_operation(obj, fun, *args):
84-
try:
85-
return fun(obj, *args), None
86-
except Exception as e:
87-
return None, e
90+
def test_no_new_methods_was_added_to_api():
91+
def is_public(name: str) -> bool:
92+
return not name.startswith("_")
93+
94+
dict_public_names = {name for name in dir({}) if is_public(name)}
95+
hash_public_names = {name for name in dir(HashMap()) if is_public(name)}
96+
97+
assert dict_public_names > hash_public_names

0 commit comments

Comments
 (0)