Source code for schrodinger.application.matsci.cache

"""
Module with functions related to caching.

Copyright Schrodinger, LLC. All rights reserved.
"""
import functools
import inspect
from collections import OrderedDict
from collections import namedtuple
from collections.abc import Hashable
from collections.abc import Iterable
from collections.abc import Mapping

DEFAULT_CACHE_SIZE = 10000
CacheInfo = namedtuple('CacheInfo', ['hit', 'miss', 'maxsize', 'currsize'])


[docs]class readonly_cached_property(functools.cached_property): """ A cached property that cannot be set or deleted """ def __set__(self, instance, value): msg = f'Assignment of "{self.attrname}" is not supported' raise AttributeError(msg) def __delete__(self, instance): msg = f'Deletion of "{self.attrname}" is not supported' raise AttributeError(msg)
[docs]class frozenset_cached_property(functools.cached_property): """ A cached property that automatically converts the property into a frozenset. Useful for ensuring that a `set` property is immutable. """ def __get__(self, *args, **kwargs): prop = super().__get__(*args, **kwargs) return frozenset(prop)
def _get_cache_name(func): """ Gets the cache name for passed function :param func: The function for which the cache is generated :type func: function :returns: The cache name :rtype: str """ return f'_{func.__name__}_cache'
[docs]def get_bound_cache(bound_func): """ Gets the bound cache data associate with the bound method :param bound_func: The bound function :type bound_func: function :returns: Cache data associated with the bound method :rtype: BaseCacheData """ cache_name = _get_cache_name(bound_func) return getattr(bound_func.__self__, cache_name, None)
[docs]class BaseCacheData(OrderedDict): """ Base limited size dictionary used for storing data """
[docs] def __init__(self, maxsize=DEFAULT_CACHE_SIZE, *args, **kwargs): """ Constructs a new BaseCacheData :param maxsize: The maxsize for the dictionary. The base class does not enforce the maxsize. The subclass needs to implement the size-limiting behavior. :type maxsize: int """ self.maxsize = maxsize self._get = 0 self._set = 0 super().__init__(*args, **kwargs)
def __getitem__(self, key): """ Get the value for the passed key :param key: The key :type key: hashable :return: The value for the given key :rtype: any """ value = super().__getitem__(key) self._get += 1 return value def __setitem__(self, key, value): """ Set the value for the passed key :param key: The key :type key: hashable :param value: The hashable value that any dictionary can hold :type value: any """ self._set += 1 super().__setitem__(key, value) @property def info(self): """ Get the cache data usage information :rtype: CacheInfo :returns: Information for cache data usage """ return CacheInfo(self._get, self._set, self.maxsize, len(self))
[docs] def clear(self): """ Clears the cache data """ self._get = 0 self._set = 0 super().clear()
[docs]class LRUCacheData(BaseCacheData): """ Limited size dictionary where least-recently-accessed key in is removed first when the maxsize is reached """ def __getitem__(self, key): """ Get the value for the passed key :param key: The key :type key: hashable :return: The value for the given key :rtype: any """ value = super().__getitem__(key) self.move_to_end(key) return value def __setitem__(self, key, value): """ Set the value for the passed key :param key: The key :type key: hashable :param value: The hashable value that any dictionary can hold :type value: any """ super().__setitem__(key, value) if len(self) > self.maxsize: oldest = next(iter(self)) del self[oldest]
[docs]class FIFOCacheData(BaseCacheData): """ Limited size dictionary where first key in is removed first when the maxsize is reached """ def __setitem__(self, key, value): """ Set the value for the passed key :param key: The key :type key: hashable :param value: The hashable value that any dictionary can hold :type value: any """ super().__setitem__(key, value) # Maintain size limit for the cache if len(self) > self.maxsize: self.popitem(last=False) # Popitem calls get item, but we should not increase the get for this # call since it's an internal call self._get -= 1
def _make_hashable(val): """ Convert non-hashable value to hashable :param val: The value to make hashable :type val: unhashable :raises TypeError: When the value cannot be converted to a hashable form :returns: The value that is hashable :rtype: hashable """ if val is None or isinstance(val, str): return val elif isinstance(val, Mapping): return tuple((k, _make_hashable(v)) for k, v in val.items()) elif isinstance(val, Iterable): return tuple(_make_hashable(x) for x in val) elif not isinstance(val, Hashable): raise TypeError(f'unhashable type: {type(val)}') else: return val
[docs]def memoized_method(cache_type=FIFOCacheData, maxsize=DEFAULT_CACHE_SIZE): """ Decorator for class methods that have weak reference to the self. This is a replacement for functools.lru_cache that works for class methods but not functions. Note like functools.lru_cache the values for the arguments of the method should be hashable. :param cache_type: The cache data type :type cache_type: BaseCacheData :param maxsize: The maximum size of the cache :type maxsize: int """ def decorator(func): def wrapper(*args, **kwargs): # Generate cache key sig = inspect.signature(func) bound = sig.bind(*args, **kwargs) parent_obj = bound.arguments.pop('self', None) # Try to convert argument values to hashable format cache_key = _make_hashable(bound.arguments) # Create cache data and save it to the parent object cache_var_name = _get_cache_name(func) if not hasattr(parent_obj, cache_var_name): cache_data = cache_type(maxsize) setattr(parent_obj, cache_var_name, cache_type(maxsize)) # Get result from cache data cache_data = getattr(parent_obj, cache_var_name) if cache_key in cache_data: return cache_data[cache_key] else: func_val = func(*args, **kwargs) cache_data[cache_key] = func_val return func_val # Set the wrapper name as the passed function name wrapper.__name__ = func.__name__ return wrapper return decorator