Skip to content

Commit e8b5483

Browse files
committed
Update tests
1 parent 7f1a552 commit e8b5483

File tree

2 files changed

+216
-0
lines changed

2 files changed

+216
-0
lines changed

data_structures/hashing/hash_map.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
from collections.abc import Iterator, MutableMapping
2+
from dataclasses import dataclass
3+
from typing import Generic, TypeVar
4+
5+
KEY = TypeVar("KEY")
6+
VAL = TypeVar("VAL")
7+
8+
9+
@dataclass(frozen=True, slots=True)
10+
class _Item(Generic[KEY, VAL]):
11+
key: KEY
12+
val: VAL
13+
14+
15+
class _DeletedItem(_Item):
16+
def __init__(self):
17+
super().__init__(None, None)
18+
19+
def __bool__(self):
20+
return False
21+
22+
23+
_deleted = _DeletedItem()
24+
25+
26+
class HashMap(MutableMapping, Generic[KEY, VAL]):
27+
def __init__(self, initial_block_size: int = 8, capacity_factor: float = 0.75):
28+
self._initial_block_size = initial_block_size
29+
self._buckets: list[_Item | None] = [None] * initial_block_size
30+
assert 0.0 < capacity_factor < 1.0
31+
self._capacity_factor = capacity_factor
32+
self._len = 0
33+
34+
def _get_index(self, key: KEY) -> int:
35+
return hash(key) % len(self._buckets)
36+
37+
def _get_next_ind(self, ind: int) -> int:
38+
return (ind + 1) % len(self._buckets)
39+
40+
def _try_set(self, ind: int, key: KEY, val: VAL) -> bool:
41+
stored = self._buckets[ind]
42+
if not stored:
43+
self._buckets[ind] = _Item(key, val)
44+
self._len += 1
45+
return True
46+
elif stored.key == key:
47+
self._buckets[ind] = _Item(key, val)
48+
return True
49+
else:
50+
return False
51+
52+
def _is_full(self):
53+
limit = len(self._buckets) * self._capacity_factor
54+
return len(self) >= int(limit)
55+
56+
def _is_sparse(self):
57+
if len(self._buckets) <= self._initial_block_size:
58+
return False
59+
limit = len(self._buckets) * self._capacity_factor / 2
60+
return len(self) < limit
61+
62+
def _resize(self, new_size: int):
63+
old_buckets = self._buckets
64+
self._buckets = [None] * new_size
65+
self._len = 0
66+
for item in old_buckets:
67+
if item:
68+
self._add_item(item.key, item.val)
69+
70+
def _size_up(self):
71+
self._resize(len(self._buckets) * 2)
72+
73+
def _size_down(self):
74+
self._resize(len(self._buckets) // 2)
75+
76+
def _iterate_buckets(self, key: KEY):
77+
ind = self._get_index(key)
78+
for _ in range(len(self._buckets)):
79+
yield ind
80+
ind = self._get_next_ind(ind)
81+
82+
def _add_item(self, key: KEY, val: VAL):
83+
for ind in self._iterate_buckets(key):
84+
if self._try_set(ind, key, val):
85+
break
86+
87+
def __setitem__(self, key: KEY, val: VAL) -> None:
88+
if self._is_sparse():
89+
self._size_down()
90+
91+
if self._is_full():
92+
self._size_up()
93+
94+
self._add_item(key, val)
95+
96+
def __delitem__(self, key: KEY) -> None:
97+
for ind in self._iterate_buckets(key):
98+
item = self._buckets[ind]
99+
if item is None:
100+
raise KeyError(key)
101+
if item is _deleted:
102+
continue
103+
if item.key == key:
104+
self._buckets[ind] = _deleted
105+
self._len -= 1
106+
break
107+
108+
def __getitem__(self, key: KEY) -> VAL:
109+
for ind in self._iterate_buckets(key):
110+
item = self._buckets[ind]
111+
if item is None:
112+
break
113+
if item is _deleted:
114+
continue
115+
if item.key == key:
116+
return item.val
117+
raise KeyError(key)
118+
119+
def __len__(self) -> int:
120+
return self._len
121+
122+
def __iter__(self) -> Iterator[KEY]:
123+
yield from (item.key for item in self._buckets if item)
124+
125+
def __repr__(self) -> str:
126+
val_string = " ,".join(
127+
f"{item.key}: {item.val}" for item in self._buckets if item
128+
)
129+
return f"HashMap({val_string})"
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
from operator import delitem, getitem, setitem
2+
3+
import pytest
4+
5+
from data_structures.hashing.hash_map import HashMap
6+
7+
8+
def get(k):
9+
return getitem, k
10+
11+
12+
def set_(k, v):
13+
return setitem, k, v
14+
15+
16+
def delete(k):
17+
return delitem, k
18+
19+
20+
add_items = (
21+
set_("key_a", "val_a"),
22+
set_("key_b", "val_b"),
23+
)
24+
25+
overwrite_items = [
26+
set_("key_a", "val_a"),
27+
set_("key_a", "val_b"),
28+
]
29+
30+
delete_items = [
31+
set_("key_a", "val_a"),
32+
set_("key_b", "val_b"),
33+
delete("key_a"),
34+
delete("key_b"),
35+
set_("key_a", "val_a"),
36+
delete("key_a"),
37+
]
38+
39+
access_not_existing_items = [
40+
get("key_a"),
41+
delete("key_a"),
42+
set_("key_a", "val_a"),
43+
delete("key_a"),
44+
delete("key_a"),
45+
get("key_a"),
46+
]
47+
48+
with_resize_up = [
49+
*[set_(x, x) for x in range(5)], # guaranteed upsize
50+
]
51+
52+
with_resize_down = [
53+
*[set_(x, x) for x in range(5)], # guaranteed upsize
54+
*[delete(x) for x in range(5)],
55+
set_("key_a", "val_b"),
56+
]
57+
58+
59+
@pytest.mark.parametrize(
60+
"operations",
61+
(
62+
pytest.param(add_items, id="add items"),
63+
pytest.param(overwrite_items, id="overwrite items"),
64+
pytest.param(delete_items, id="delete items"),
65+
pytest.param(access_not_existing_items, id="access none existing items"),
66+
pytest.param(with_resize_up, id="with resize up"),
67+
pytest.param(with_resize_down, id="with resize down"),
68+
),
69+
)
70+
def test_hash_map_is_the_same_as_dict(operations):
71+
my = HashMap(initial_block_size=4)
72+
py = {}
73+
for _, (fun, *args) in enumerate(operations):
74+
my_res, my_exc = _run_operation(my, fun, *args)
75+
py_res, py_exc = _run_operation(py, fun, *args)
76+
assert my_res == py_res
77+
assert str(my_exc) == str(py_exc)
78+
assert set(py) == set(my)
79+
assert len(py) == len(my)
80+
assert set(my.items()) == set(py.items())
81+
82+
83+
def _run_operation(obj, fun, *args):
84+
try:
85+
return fun(obj, *args), None
86+
except Exception as e:
87+
return None, e

0 commit comments

Comments
 (0)