Skip to content

Commit 8f406c8

Browse files
committed
An individual cache, after 3+ prototypes
WIP: Not runnable scratch Doodle Gear towards a different direction WIP Rename to ControllableCache Use new ControllableCache Proof-of-Concept Support runtime storage Rename to IndividualCache Documentation draft Enable http_decorate
1 parent dd6d9a1 commit 8f406c8

File tree

2 files changed

+376
-0
lines changed

2 files changed

+376
-0
lines changed

msal/individual_cache.py

Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
from functools import wraps
2+
import time
3+
try:
4+
from collections.abc import MutableMapping # Python 3.3+
5+
except ImportError:
6+
from collections import MutableMapping # Python 2.7+
7+
import heapq
8+
from threading import Lock
9+
10+
11+
class _ExpiringMapping(MutableMapping):
12+
_INDEX = "_index_"
13+
14+
def __init__(self, mapping=None, capacity=None, expires_in=None, lock=None,
15+
*args, **kwargs):
16+
"""Items in this mapping can have individual shelf life,
17+
just like food items in your refrigerator have their different shelf life
18+
determined by each food, not by the refrigerator.
19+
20+
Expired items will be automatically evicted.
21+
The clean-up will be done at each time when adding a new item,
22+
or when looping or counting the entire mapping.
23+
(This is better than being done indecisively by a background thread,
24+
which might not always happen before your accessing the mapping.)
25+
26+
This implementation uses no dependency other than Python standard library.
27+
28+
:param MutableMapping mapping:
29+
A dict-like key-value mapping, which needs to support __setitem__(),
30+
__getitem__(), __delitem__(), get(), pop().
31+
32+
The default mapping is an in-memory dict.
33+
34+
You could potentially supply a file-based dict-like object, too.
35+
This implementation deliberately avoid mapping.__iter__(),
36+
which could be slow on a file-based mapping.
37+
38+
:param int capacity:
39+
How many items this mapping will hold.
40+
When you attempt to add new item into a full mapping,
41+
it will automatically delete the item that is expiring soonest.
42+
43+
The default value is None, which means there is no capacity limit.
44+
45+
:param int expires_in:
46+
How many seconds an item would expire and be purged from this mapping.
47+
Also known as time-to-live (TTL).
48+
You can also use :func:`~set()` to provide per-item expires_in value.
49+
50+
:param Lock lock:
51+
A locking mechanism with context manager interface.
52+
If no lock is provided, a threading.Lock will be used.
53+
But you may want to supply a different lock,
54+
if your customized mapping is being shared differently.
55+
"""
56+
super(_ExpiringMapping, self).__init__(*args, **kwargs)
57+
self._mapping = mapping if mapping is not None else {}
58+
self._capacity = capacity
59+
self._expires_in = expires_in
60+
self._lock = Lock() if lock is None else lock
61+
62+
def _validate_key(self, key):
63+
if key == self._INDEX:
64+
raise ValueError("key {} is a reserved keyword in {}".format(
65+
key, self.__class__.__name__))
66+
67+
def set(self, key, value, expires_in):
68+
# This method's name was chosen so that it matches its cousin __setitem__(),
69+
# and it also complements the counterpart get().
70+
# The downside is such a name shadows the built-in type set in this file,
71+
# but you can overcome that by defining a global alias for set.
72+
"""It sets the key-value pair into this mapping, with its per-item expires_in.
73+
74+
It will take O(logN) time, because it will run some maintenance.
75+
This worse-than-constant time is acceptable, because in a cache scenario,
76+
__setitem__() would only be called during a cache miss,
77+
which would already incur an expensive target function call anyway.
78+
79+
By the way, most other methods of this mapping still have O(1) constant time.
80+
"""
81+
with self._lock:
82+
self._set(key, value, expires_in)
83+
84+
def _set(self, key, value, expires_in):
85+
# This internal implementation powers both set() and __setitem__(),
86+
# so that they don't depend on each other.
87+
self._validate_key(key)
88+
sequence, timestamps = self._mapping.get(self._INDEX, ([], {}))
89+
self._maintenance(sequence, timestamps) # O(logN)
90+
now = int(time.time())
91+
expires_at = now + expires_in
92+
entry = [expires_at, now, key]
93+
is_new_item = key not in timestamps
94+
is_beyond_capacity = self._capacity and len(timestamps) >= self._capacity
95+
if is_new_item and is_beyond_capacity:
96+
self._drop_indexed_entry(timestamps, heapq.heappushpop(sequence, entry))
97+
else: # Simply add new entry. The old one would become a harmless orphan.
98+
heapq.heappush(sequence, entry)
99+
timestamps[key] = [expires_at, now] # It overwrites existing key, if any
100+
self._mapping[key] = value
101+
self._mapping[self._INDEX] = sequence, timestamps
102+
103+
def _maintenance(self, sequence, timestamps): # O(logN)
104+
"""It will modify input sequence and timestamps in-place"""
105+
now = int(time.time())
106+
while sequence: # Clean up expired items
107+
expires_at, created_at, key = sequence[0]
108+
if created_at <= now < expires_at: # Then all remaining items are fresh
109+
break
110+
self._drop_indexed_entry(timestamps, sequence[0]) # It could error out
111+
heapq.heappop(sequence) # Only pop it after a successful _drop_indexed_entry()
112+
while self._capacity is not None and len(timestamps) > self._capacity:
113+
self._drop_indexed_entry(timestamps, sequence[0]) # It could error out
114+
heapq.heappop(sequence) # Only pop it after a successful _drop_indexed_entry()
115+
116+
def _drop_indexed_entry(self, timestamps, entry):
117+
"""For an entry came from index, drop it from timestamps and self._mapping"""
118+
expires_at, created_at, key = entry
119+
if [expires_at, created_at] == timestamps.get(key): # So it is not an orphan
120+
self._mapping.pop(key, None) # It could raise exception
121+
timestamps.pop(key, None) # This would probably always succeed
122+
123+
def __setitem__(self, key, value):
124+
"""Implements the __setitem__().
125+
126+
Same characteristic as :func:`~set()`,
127+
but use class-wide expires_in which was specified by :func:`~__init__()`.
128+
"""
129+
if self._expires_in is None:
130+
raise ValueError("Need a numeric value for expires_in during __init__()")
131+
with self._lock:
132+
self._set(key, value, self._expires_in)
133+
134+
def __getitem__(self, key): # O(1)
135+
"""If the item you requested already expires, KeyError will be raised."""
136+
self._validate_key(key)
137+
with self._lock:
138+
# Skip self._maintenance(), because it would need O(logN) time
139+
sequence, timestamps = self._mapping.get(self._INDEX, ([], {}))
140+
expires_at, created_at = timestamps[key] # Would raise KeyError accordingly
141+
now = int(time.time())
142+
if not created_at <= now < expires_at:
143+
self._mapping.pop(key, None)
144+
timestamps.pop(key, None)
145+
self._mapping[self._INDEX] = sequence, timestamps
146+
raise KeyError("{} expired".format(key))
147+
return self._mapping[key] # O(1)
148+
149+
def __delitem__(self, key): # O(1)
150+
"""If the item you requested already expires, KeyError will be raised."""
151+
self._validate_key(key)
152+
with self._lock:
153+
# Skip self._maintenance(), because it would need O(logN) time
154+
self._mapping.pop(key, None) # O(1)
155+
sequence, timestamps = self._mapping.get(self._INDEX, ([], {}))
156+
del timestamps[key] # O(1)
157+
self._mapping[self._INDEX] = sequence, timestamps
158+
159+
def __len__(self): # O(logN)
160+
"""Drop all expired items and return the remaining length"""
161+
with self._lock:
162+
sequence, timestamps = self._mapping.get(self._INDEX, ([], {}))
163+
self._maintenance(sequence, timestamps) # O(logN)
164+
self._mapping[self._INDEX] = sequence, timestamps
165+
return len(timestamps) # Faster than iter(self._mapping) when it is on disk
166+
167+
def __iter__(self):
168+
"""Drop all expired items and return an iterator of the remaining items"""
169+
with self._lock:
170+
sequence, timestamps = self._mapping.get(self._INDEX, ([], {}))
171+
self._maintenance(sequence, timestamps) # O(logN)
172+
self._mapping[self._INDEX] = sequence, timestamps
173+
return iter(timestamps) # Faster than iter(self._mapping) when it is on disk
174+
175+
176+
class _IndividualCache(object):
177+
# The code structure below can decorate both function and method.
178+
# It is inspired by https://stackoverflow.com/a/9417088
179+
# We may potentially switch to build upon
180+
# https://github.com/micheles/decorator/blob/master/docs/documentation.md#statement-of-the-problem
181+
def __init__(self, mapping=None, key_maker=None, expires_in=None):
182+
"""Constructs a cache decorator that allows item-by-item control on
183+
how to cache the return value of the decorated function.
184+
185+
:param MutableMapping mapping:
186+
The cached items will be stored inside.
187+
You'd want to use a ExpiringMapping
188+
if you plan to utilize the ``expires_in`` behavior.
189+
190+
If nothing is provided, an in-memory dict will be used,
191+
but it will provide no expiry functionality.
192+
193+
.. note::
194+
195+
When using this class as a decorator,
196+
your mapping needs to be available at "compile" time,
197+
so it would typically be a global-, module- or class-level mapping::
198+
199+
module_mapping = {}
200+
201+
@IndividualCache(mapping=module_mapping, ...)
202+
def foo():
203+
...
204+
205+
If you want to use a mapping available only at run-time,
206+
you have to manually decorate your function at run-time, too::
207+
208+
def foo():
209+
...
210+
211+
def bar(runtime_mapping):
212+
foo = IndividualCache(mapping=runtime_mapping...)(foo)
213+
214+
:param callable key_maker:
215+
A callable which should have signature as
216+
``lambda function, args, kwargs: "return a string as key"``.
217+
218+
If key_maker happens to return ``None``, the cache will be bypassed,
219+
the underlying function will be invoked directly,
220+
and the invoke result will not be cached either.
221+
222+
:param callable expires_in:
223+
The default value is ``None``,
224+
which means the content being cached has no per-item expiry,
225+
and will subject to the underlying mapping's global expiry time.
226+
227+
It can be an integer indicating
228+
how many seconds the result will be cached.
229+
In particular, if the value is 0,
230+
it means the result expires after zero second (i.e. immediately),
231+
therefore the result will *not* be cached.
232+
(Mind the difference between ``expires_in=0`` and ``expires_in=None``.)
233+
234+
Or it can be a callable with the signature as
235+
``lambda function=function, args=args, kwargs=kwargs, result=result: 123``
236+
to calculate the expiry on the fly.
237+
Its return value will be interpreted in the same way as above.
238+
"""
239+
self._mapping = mapping if mapping is not None else {}
240+
self._key_maker = key_maker or (lambda function, args, kwargs: (
241+
function, # This default implementation uses function as part of key,
242+
# so that the cache is partitioned by function.
243+
# However, you could have many functions to use same namespace,
244+
# so different decorators could share same cache.
245+
args,
246+
tuple(kwargs.items()), # raw kwargs is not hashable
247+
))
248+
self._expires_in = expires_in
249+
250+
def __call__(self, function):
251+
252+
@wraps(function)
253+
def wrapper(*args, **kwargs):
254+
key = self._key_maker(function, args, kwargs)
255+
if key is None: # Then bypass the cache
256+
return function(*args, **kwargs)
257+
258+
now = int(time.time())
259+
try:
260+
return self._mapping[key]
261+
except KeyError:
262+
# We choose to NOT call function(...) in this block, otherwise
263+
# potential exception from function(...) would become a confusing
264+
# "During handling of the above exception, another exception occurred"
265+
pass
266+
value = function(*args, **kwargs)
267+
268+
expires_in = self._expires_in(
269+
function=function,
270+
args=args,
271+
kwargs=kwargs,
272+
result=value,
273+
) if callable(self._expires_in) else self._expires_in
274+
if expires_in == 0:
275+
return value
276+
if expires_in is None:
277+
self._mapping[key] = value
278+
else:
279+
self._mapping.set(key, value, expires_in)
280+
return value
281+
282+
return wrapper
283+

tests/test_individual_cache.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
from time import sleep
2+
from random import random
3+
import unittest
4+
from msal.individual_cache import _ExpiringMapping as ExpiringMapping
5+
from msal.individual_cache import _IndividualCache as IndividualCache
6+
7+
8+
class TestExpiringMapping(unittest.TestCase):
9+
def setUp(self):
10+
self.mapping = {}
11+
self.m = ExpiringMapping(mapping=self.mapping, capacity=2, expires_in=1)
12+
13+
def test_should_disallow_accessing_reserved_keyword(self):
14+
with self.assertRaises(ValueError):
15+
self.m.get(ExpiringMapping._INDEX)
16+
17+
def test_setitem(self):
18+
self.assertEqual(0, len(self.m))
19+
self.m["thing one"] = "one"
20+
self.assertIn(ExpiringMapping._INDEX, self.mapping, "Index created")
21+
self.assertEqual(1, len(self.m), "It contains one item (excluding index)")
22+
self.assertEqual("one", self.m["thing one"])
23+
self.assertEqual(["thing one"], list(self.m))
24+
25+
def test_set(self):
26+
self.assertEqual(0, len(self.m))
27+
self.m.set("thing two", "two", 2)
28+
self.assertIn(ExpiringMapping._INDEX, self.mapping, "Index created")
29+
self.assertEqual(1, len(self.m), "It contains one item (excluding index)")
30+
self.assertEqual("two", self.m["thing two"])
31+
self.assertEqual(["thing two"], list(self.m))
32+
33+
def test_len_should_purge(self):
34+
self.m["thing one"] = "one"
35+
sleep(1)
36+
self.assertEqual(0, len(self.m))
37+
38+
def test_iter_should_purge(self):
39+
self.m["thing one"] = "one"
40+
sleep(1)
41+
self.assertEqual([], list(self.m))
42+
43+
def test_get_should_purge(self):
44+
self.m["thing one"] = "one"
45+
sleep(1)
46+
with self.assertRaises(KeyError):
47+
self.m["thing one"]
48+
49+
def test_various_expiring_time(self):
50+
self.assertEqual(0, len(self.m))
51+
self.m["thing one"] = "one"
52+
self.m.set("thing two", "two", 2)
53+
self.assertEqual(2, len(self.m), "It contains 2 items")
54+
sleep(1)
55+
self.assertEqual(["thing two"], list(self.m), "One expires, another remains")
56+
57+
def test_old_item_can_be_updated_with_new_expiry_time(self):
58+
self.assertEqual(0, len(self.m))
59+
self.m["thing"] = "one"
60+
self.m.set("thing", "two", 2)
61+
self.assertEqual(1, len(self.m), "It contains 1 item")
62+
self.assertEqual("two", self.m["thing"], 'Already been updated to "two"')
63+
sleep(1)
64+
self.assertEqual("two", self.m["thing"], "Not yet expires")
65+
sleep(1)
66+
self.assertEqual(0, len(self.m))
67+
68+
def test_oversized_input_should_purge_most_aging_item(self):
69+
self.assertEqual(0, len(self.m))
70+
self.m["thing one"] = "one"
71+
self.m.set("thing two", "two", 2)
72+
self.assertEqual(2, len(self.m), "It contains 2 items")
73+
self.m["thing three"] = "three"
74+
self.assertEqual(2, len(self.m), "It contains 2 items")
75+
self.assertNotIn("thing one", self.m)
76+
77+
78+
class TestIndividualCache(unittest.TestCase):
79+
mapping = {}
80+
81+
@IndividualCache(mapping=mapping)
82+
def foo(self, a, b, c=None, d=None):
83+
return random() # So that we'd know whether a new response is received
84+
85+
def test_memorize_a_function_call(self):
86+
self.assertNotEqual(self.foo(1, 1), self.foo(2, 2))
87+
self.assertEqual(
88+
self.foo(1, 2, c=3, d=4),
89+
self.foo(1, 2, c=3, d=4),
90+
"Subsequent run should obtain same result from cache")
91+
# Note: In Python 3.7+, dict is ordered, so the following is typically True:
92+
#self.assertNotEqual(self.foo(a=1, b=2), self.foo(b=2, a=1))
93+

0 commit comments

Comments
 (0)