|
| 1 | +from functools import wraps |
| 2 | +import time |
| 3 | +try: |
| 4 | + from collections.abc import MutableMapping # Python 3.3+ |
| 5 | +except ImportError: |
| 6 | + from collections import MutableMapping # Python 2.7+ |
| 7 | +import heapq |
| 8 | +from threading import Lock |
| 9 | + |
| 10 | + |
| 11 | +class _ExpiringMapping(MutableMapping): |
| 12 | + _INDEX = "_index_" |
| 13 | + |
| 14 | + def __init__(self, mapping=None, capacity=None, expires_in=None, lock=None, |
| 15 | + *args, **kwargs): |
| 16 | + """Items in this mapping can have individual shelf life, |
| 17 | + just like food items in your refrigerator have their different shelf life |
| 18 | + determined by each food, not by the refrigerator. |
| 19 | +
|
| 20 | + Expired items will be automatically evicted. |
| 21 | + The clean-up will be done at each time when adding a new item, |
| 22 | + or when looping or counting the entire mapping. |
| 23 | + (This is better than being done indecisively by a background thread, |
| 24 | + which might not always happen before your accessing the mapping.) |
| 25 | +
|
| 26 | + This implementation uses no dependency other than Python standard library. |
| 27 | +
|
| 28 | + :param MutableMapping mapping: |
| 29 | + A dict-like key-value mapping, which needs to support __setitem__(), |
| 30 | + __getitem__(), __delitem__(), get(), pop(). |
| 31 | +
|
| 32 | + The default mapping is an in-memory dict. |
| 33 | +
|
| 34 | + You could potentially supply a file-based dict-like object, too. |
| 35 | + This implementation deliberately avoid mapping.__iter__(), |
| 36 | + which could be slow on a file-based mapping. |
| 37 | +
|
| 38 | + :param int capacity: |
| 39 | + How many items this mapping will hold. |
| 40 | + When you attempt to add new item into a full mapping, |
| 41 | + it will automatically delete the item that is expiring soonest. |
| 42 | +
|
| 43 | + The default value is None, which means there is no capacity limit. |
| 44 | +
|
| 45 | + :param int expires_in: |
| 46 | + How many seconds an item would expire and be purged from this mapping. |
| 47 | + Also known as time-to-live (TTL). |
| 48 | + You can also use :func:`~set()` to provide per-item expires_in value. |
| 49 | +
|
| 50 | + :param Lock lock: |
| 51 | + A locking mechanism with context manager interface. |
| 52 | + If no lock is provided, a threading.Lock will be used. |
| 53 | + But you may want to supply a different lock, |
| 54 | + if your customized mapping is being shared differently. |
| 55 | + """ |
| 56 | + super(_ExpiringMapping, self).__init__(*args, **kwargs) |
| 57 | + self._mapping = mapping if mapping is not None else {} |
| 58 | + self._capacity = capacity |
| 59 | + self._expires_in = expires_in |
| 60 | + self._lock = Lock() if lock is None else lock |
| 61 | + |
| 62 | + def _validate_key(self, key): |
| 63 | + if key == self._INDEX: |
| 64 | + raise ValueError("key {} is a reserved keyword in {}".format( |
| 65 | + key, self.__class__.__name__)) |
| 66 | + |
| 67 | + def set(self, key, value, expires_in): |
| 68 | + # This method's name was chosen so that it matches its cousin __setitem__(), |
| 69 | + # and it also complements the counterpart get(). |
| 70 | + # The downside is such a name shadows the built-in type set in this file, |
| 71 | + # but you can overcome that by defining a global alias for set. |
| 72 | + """It sets the key-value pair into this mapping, with its per-item expires_in. |
| 73 | +
|
| 74 | + It will take O(logN) time, because it will run some maintenance. |
| 75 | + This worse-than-constant time is acceptable, because in a cache scenario, |
| 76 | + __setitem__() would only be called during a cache miss, |
| 77 | + which would already incur an expensive target function call anyway. |
| 78 | +
|
| 79 | + By the way, most other methods of this mapping still have O(1) constant time. |
| 80 | + """ |
| 81 | + with self._lock: |
| 82 | + self._set(key, value, expires_in) |
| 83 | + |
| 84 | + def _set(self, key, value, expires_in): |
| 85 | + # This internal implementation powers both set() and __setitem__(), |
| 86 | + # so that they don't depend on each other. |
| 87 | + self._validate_key(key) |
| 88 | + sequence, timestamps = self._mapping.get(self._INDEX, ([], {})) |
| 89 | + self._maintenance(sequence, timestamps) # O(logN) |
| 90 | + now = int(time.time()) |
| 91 | + expires_at = now + expires_in |
| 92 | + entry = [expires_at, now, key] |
| 93 | + is_new_item = key not in timestamps |
| 94 | + is_beyond_capacity = self._capacity and len(timestamps) >= self._capacity |
| 95 | + if is_new_item and is_beyond_capacity: |
| 96 | + self._drop_indexed_entry(timestamps, heapq.heappushpop(sequence, entry)) |
| 97 | + else: # Simply add new entry. The old one would become a harmless orphan. |
| 98 | + heapq.heappush(sequence, entry) |
| 99 | + timestamps[key] = [expires_at, now] # It overwrites existing key, if any |
| 100 | + self._mapping[key] = value |
| 101 | + self._mapping[self._INDEX] = sequence, timestamps |
| 102 | + |
| 103 | + def _maintenance(self, sequence, timestamps): # O(logN) |
| 104 | + """It will modify input sequence and timestamps in-place""" |
| 105 | + now = int(time.time()) |
| 106 | + while sequence: # Clean up expired items |
| 107 | + expires_at, created_at, key = sequence[0] |
| 108 | + if created_at <= now < expires_at: # Then all remaining items are fresh |
| 109 | + break |
| 110 | + self._drop_indexed_entry(timestamps, sequence[0]) # It could error out |
| 111 | + heapq.heappop(sequence) # Only pop it after a successful _drop_indexed_entry() |
| 112 | + while self._capacity is not None and len(timestamps) > self._capacity: |
| 113 | + self._drop_indexed_entry(timestamps, sequence[0]) # It could error out |
| 114 | + heapq.heappop(sequence) # Only pop it after a successful _drop_indexed_entry() |
| 115 | + |
| 116 | + def _drop_indexed_entry(self, timestamps, entry): |
| 117 | + """For an entry came from index, drop it from timestamps and self._mapping""" |
| 118 | + expires_at, created_at, key = entry |
| 119 | + if [expires_at, created_at] == timestamps.get(key): # So it is not an orphan |
| 120 | + self._mapping.pop(key, None) # It could raise exception |
| 121 | + timestamps.pop(key, None) # This would probably always succeed |
| 122 | + |
| 123 | + def __setitem__(self, key, value): |
| 124 | + """Implements the __setitem__(). |
| 125 | +
|
| 126 | + Same characteristic as :func:`~set()`, |
| 127 | + but use class-wide expires_in which was specified by :func:`~__init__()`. |
| 128 | + """ |
| 129 | + if self._expires_in is None: |
| 130 | + raise ValueError("Need a numeric value for expires_in during __init__()") |
| 131 | + with self._lock: |
| 132 | + self._set(key, value, self._expires_in) |
| 133 | + |
| 134 | + def __getitem__(self, key): # O(1) |
| 135 | + """If the item you requested already expires, KeyError will be raised.""" |
| 136 | + self._validate_key(key) |
| 137 | + with self._lock: |
| 138 | + # Skip self._maintenance(), because it would need O(logN) time |
| 139 | + sequence, timestamps = self._mapping.get(self._INDEX, ([], {})) |
| 140 | + expires_at, created_at = timestamps[key] # Would raise KeyError accordingly |
| 141 | + now = int(time.time()) |
| 142 | + if not created_at <= now < expires_at: |
| 143 | + self._mapping.pop(key, None) |
| 144 | + timestamps.pop(key, None) |
| 145 | + self._mapping[self._INDEX] = sequence, timestamps |
| 146 | + raise KeyError("{} expired".format(key)) |
| 147 | + return self._mapping[key] # O(1) |
| 148 | + |
| 149 | + def __delitem__(self, key): # O(1) |
| 150 | + """If the item you requested already expires, KeyError will be raised.""" |
| 151 | + self._validate_key(key) |
| 152 | + with self._lock: |
| 153 | + # Skip self._maintenance(), because it would need O(logN) time |
| 154 | + self._mapping.pop(key, None) # O(1) |
| 155 | + sequence, timestamps = self._mapping.get(self._INDEX, ([], {})) |
| 156 | + del timestamps[key] # O(1) |
| 157 | + self._mapping[self._INDEX] = sequence, timestamps |
| 158 | + |
| 159 | + def __len__(self): # O(logN) |
| 160 | + """Drop all expired items and return the remaining length""" |
| 161 | + with self._lock: |
| 162 | + sequence, timestamps = self._mapping.get(self._INDEX, ([], {})) |
| 163 | + self._maintenance(sequence, timestamps) # O(logN) |
| 164 | + self._mapping[self._INDEX] = sequence, timestamps |
| 165 | + return len(timestamps) # Faster than iter(self._mapping) when it is on disk |
| 166 | + |
| 167 | + def __iter__(self): |
| 168 | + """Drop all expired items and return an iterator of the remaining items""" |
| 169 | + with self._lock: |
| 170 | + sequence, timestamps = self._mapping.get(self._INDEX, ([], {})) |
| 171 | + self._maintenance(sequence, timestamps) # O(logN) |
| 172 | + self._mapping[self._INDEX] = sequence, timestamps |
| 173 | + return iter(timestamps) # Faster than iter(self._mapping) when it is on disk |
| 174 | + |
| 175 | + |
| 176 | +class _IndividualCache(object): |
| 177 | + # The code structure below can decorate both function and method. |
| 178 | + # It is inspired by https://stackoverflow.com/a/9417088 |
| 179 | + # We may potentially switch to build upon |
| 180 | + # https://github.com/micheles/decorator/blob/master/docs/documentation.md#statement-of-the-problem |
| 181 | + def __init__(self, mapping=None, key_maker=None, expires_in=None): |
| 182 | + """Constructs a cache decorator that allows item-by-item control on |
| 183 | + how to cache the return value of the decorated function. |
| 184 | +
|
| 185 | + :param MutableMapping mapping: |
| 186 | + The cached items will be stored inside. |
| 187 | + You'd want to use a ExpiringMapping |
| 188 | + if you plan to utilize the ``expires_in`` behavior. |
| 189 | +
|
| 190 | + If nothing is provided, an in-memory dict will be used, |
| 191 | + but it will provide no expiry functionality. |
| 192 | +
|
| 193 | + .. note:: |
| 194 | +
|
| 195 | + When using this class as a decorator, |
| 196 | + your mapping needs to be available at "compile" time, |
| 197 | + so it would typically be a global-, module- or class-level mapping:: |
| 198 | +
|
| 199 | + module_mapping = {} |
| 200 | +
|
| 201 | + @IndividualCache(mapping=module_mapping, ...) |
| 202 | + def foo(): |
| 203 | + ... |
| 204 | +
|
| 205 | + If you want to use a mapping available only at run-time, |
| 206 | + you have to manually decorate your function at run-time, too:: |
| 207 | +
|
| 208 | + def foo(): |
| 209 | + ... |
| 210 | +
|
| 211 | + def bar(runtime_mapping): |
| 212 | + foo = IndividualCache(mapping=runtime_mapping...)(foo) |
| 213 | +
|
| 214 | + :param callable key_maker: |
| 215 | + A callable which should have signature as |
| 216 | + ``lambda function, args, kwargs: "return a string as key"``. |
| 217 | +
|
| 218 | + If key_maker happens to return ``None``, the cache will be bypassed, |
| 219 | + the underlying function will be invoked directly, |
| 220 | + and the invoke result will not be cached either. |
| 221 | +
|
| 222 | + :param callable expires_in: |
| 223 | + The default value is ``None``, |
| 224 | + which means the content being cached has no per-item expiry, |
| 225 | + and will subject to the underlying mapping's global expiry time. |
| 226 | +
|
| 227 | + It can be an integer indicating |
| 228 | + how many seconds the result will be cached. |
| 229 | + In particular, if the value is 0, |
| 230 | + it means the result expires after zero second (i.e. immediately), |
| 231 | + therefore the result will *not* be cached. |
| 232 | + (Mind the difference between ``expires_in=0`` and ``expires_in=None``.) |
| 233 | +
|
| 234 | + Or it can be a callable with the signature as |
| 235 | + ``lambda function=function, args=args, kwargs=kwargs, result=result: 123`` |
| 236 | + to calculate the expiry on the fly. |
| 237 | + Its return value will be interpreted in the same way as above. |
| 238 | + """ |
| 239 | + self._mapping = mapping if mapping is not None else {} |
| 240 | + self._key_maker = key_maker or (lambda function, args, kwargs: ( |
| 241 | + function, # This default implementation uses function as part of key, |
| 242 | + # so that the cache is partitioned by function. |
| 243 | + # However, you could have many functions to use same namespace, |
| 244 | + # so different decorators could share same cache. |
| 245 | + args, |
| 246 | + tuple(kwargs.items()), # raw kwargs is not hashable |
| 247 | + )) |
| 248 | + self._expires_in = expires_in |
| 249 | + |
| 250 | + def __call__(self, function): |
| 251 | + |
| 252 | + @wraps(function) |
| 253 | + def wrapper(*args, **kwargs): |
| 254 | + key = self._key_maker(function, args, kwargs) |
| 255 | + if key is None: # Then bypass the cache |
| 256 | + return function(*args, **kwargs) |
| 257 | + |
| 258 | + now = int(time.time()) |
| 259 | + try: |
| 260 | + return self._mapping[key] |
| 261 | + except KeyError: |
| 262 | + # We choose to NOT call function(...) in this block, otherwise |
| 263 | + # potential exception from function(...) would become a confusing |
| 264 | + # "During handling of the above exception, another exception occurred" |
| 265 | + pass |
| 266 | + value = function(*args, **kwargs) |
| 267 | + |
| 268 | + expires_in = self._expires_in( |
| 269 | + function=function, |
| 270 | + args=args, |
| 271 | + kwargs=kwargs, |
| 272 | + result=value, |
| 273 | + ) if callable(self._expires_in) else self._expires_in |
| 274 | + if expires_in == 0: |
| 275 | + return value |
| 276 | + if expires_in is None: |
| 277 | + self._mapping[key] = value |
| 278 | + else: |
| 279 | + self._mapping.set(key, value, expires_in) |
| 280 | + return value |
| 281 | + |
| 282 | + return wrapper |
| 283 | + |
0 commit comments