Source code for schrodinger.math.mathutils

"""
Contains math-related utility functions

Copyright Schrodinger, LLC. All rights reserved.
"""

from collections import defaultdict
from math import floor
from math import log10

import numpy
from scipy import interpolate
from sklearn import cluster
from sklearn import metrics


[docs]def round_value(val, precision=3, significant_figures=None): """ Return val as a string with the required precision or significant figures. Either precision or significant_figures should be provided. Uses scientific notation for very large or small values, if the precision allows. :param float val: The value to round :param int precision: The precision needed after rounding. A precision of 2 means two decimal places should be kept after rounding. A negative precision indicates how many digits can be replaced with zeros before the decimal point. -5 means that 123456 can be shown as 1e5, -3 means it can be shown as 1.23e5. :param int significant_figures: The number of significant figures that should remain after rounding. If provided, determines the rounding precision. :rtype: str or None :return: A string with the required precision, or None if the input is a string """ if isinstance(val, str): return None val = float(val) if val == 0: return '0' magnitude = int(numpy.floor(numpy.log10(abs(val)))) if significant_figures: precision = significant_figures - magnitude - 1 # Uses scientific notation if more than 4 digits can be removed in large numbers if magnitude < -3 or (magnitude > 3 and precision < -3): decimals = max(0, magnitude + precision) return f'{val:.{decimals}e}' elif precision > 0: return f'{val:.{precision}f}' else: return str(round(val))
[docs]def deduplicate_xy_data(x_vals, y_vals): """ Remove duplicate x values by averaging the y values for them. :param list x_vals: The x values :param list y_vals: The y values :rtype: list, list :return: The deduplicated xy data """ all_vals = defaultdict(list) for x_val, y_val in zip(x_vals, y_vals): all_vals[x_val].append(y_val) deduped_x = list(all_vals.keys()) deduped_y = [sum(all_vals[x]) / len(all_vals[x]) for x in deduped_x] return deduped_x, deduped_y
[docs]class Interpolate1D: """ Creates a map between values in a source and a target axis, allowing to get the equivalent target point for each source point. Wrapper around `scipy.interpolate.interp1d` to allow logarithmic interpolation or extrapolation. """
[docs] def __init__(self, source_vals, target_vals, log_interp=False): """ Create an instance. :param tuple source_vals: The values of points in the source range :param tuple target_vals: The values of points in the target range :param bool log_interp: Whether the interpolation is logarithmic. If False, linear interpolation will be used. """ if log_interp: target_vals = [numpy.log10(x) for x in target_vals] linear_interp = interpolate.interp1d(source_vals, target_vals, fill_value='extrapolate') self.interp = lambda x: numpy.power(10, linear_interp(x)) else: self.interp = interpolate.interp1d(source_vals, target_vals, fill_value='extrapolate')
def __call__(self, source_val): """ Get the equivalent target value of the passed source value :param float source_val: The value in the source range :rtype: float :return: The equivalent value in the target range """ # The return value of interp is 'numpy.ndarray' so convert to float return float(self.interp(source_val))
[docs]class Interpolate2D: """ Creates two instances of Interpolate1D to map values between two source axes and two target axes. Example use case is mapping QGraphicsScene/QChart XY coordinates to a XY coordinate system being displayed in the scene/chart. The two axes need to be independent. """
[docs] def __init__(self, x_source_vals, x_target_vals, y_source_vals, y_target_vals, x_log_interp=False, y_log_interp=False): """ Create an instance. :param tuple x_source_vals: The values of points in the X source range :param tuple x_target_vals: The values of points in the X target range :param tuple y_source_vals: The values of points in the Y source range :param tuple y_target_vals: The values of points in the Y target range :param bool x_log_interp: Whether the X axis interpolation is logarithmic :param bool y_log_interp: Whether the Y axis interpolation is logarithmic """ self.x_interp = Interpolate1D(x_source_vals, x_target_vals, x_log_interp) self.y_interp = Interpolate1D(y_source_vals, y_target_vals, y_log_interp)
def __call__(self, source_x_val, source_y_val): """ Get the equivalent target values of the passed source values :param float source_x_val: The X value in the source range :param float source_y_val: The Y value in the source range :rtype: float, float :return: The equivalent x, y values in the target range """ return (self.x_interp(source_x_val), self.y_interp(source_y_val))
[docs]def roundup(inp): """ Round away from zero (to +/-infinity). :type inp: float or numpy.array :param inp: value to be rounded :rtype: float or numpy.array :return: Rounded value """ # This is NOT default behavior of python3, see also: # mail.python.org/pipermail/numpy-discussion/2014-October/071302.html return numpy.trunc(inp + numpy.copysign(.5, inp))
[docs]def sig_fig_round(value, significant_figure=5): """ :type value: float :param value: change the significant figures of this value :type significant_figure: int :param significant_figure: significant figure of the displayed value :rtype: str :return: str representation of a number with proper significant figures """ sig_fig_fmt = '%.{}g'.format(significant_figure) return sig_fig_fmt % value
[docs]def get_significant_figures(number): """ Get significant digits of a number :type number: float or int or str :param number: number to find significant digits :rtype: int :return: number of significant digits """ sig_fig = 0 check_first = True decimal = False track_zero = 0 for digit in str(number): if digit == '.': decimal = True continue if digit == '0' and check_first: continue else: check_first = False sig_fig += 1 if digit == '0': track_zero += 1 else: track_zero = 0 if digit == 'E' or digit == 'e' or digit == 'x': sig_fig -= 1 break if not decimal: sig_fig -= track_zero return sig_fig
[docs]def set_significant_figures(number, sig_fig): """ Set significant digits of a number :type number: float or int :param number: number to set significant digits :type sig_fig: int :param sig_fig: number of significant digits :rtype: float :return: number with desired significant digits """ round_to = sig_fig - int(floor(log10(abs(number)))) - 1 return numpy.round(number, round_to)
[docs]def polyfit(xdata, ydata, degree): """ Fit a polynomial of rang degree to data. :param numpy.array xdata: X data :param numpy.array ydata: Y data :param int degree: Degree of the fitting polynomial :rtype: numpy.array, float :return: Fitting coefficients, coefficient of determination (R^2) """ coeffs = numpy.polyfit(xdata, ydata, degree) # Get polynomial polynomial = numpy.poly1d(coeffs) # fit values, and mean yhat = polynomial(xdata) ybar = numpy.sum(ydata) / len(ydata) ssreg = numpy.sum((yhat - ybar)**2) sstot = numpy.sum((ydata - ybar)**2) return coeffs, ssreg / sstot
[docs]def dbscan_cluster(objects, eps, key=None): """ Cluster the given objects by numbers obtained via the given key function using a specified precision. Uses simple interface to sklearn.cluster.DBSCAN (density based spatial clustering). :type objects: list :param objects: the objects to cluster :type eps: float :param eps: the precision that controls the size of the clusters, for example if objects is [102, 104, 307, 309, 99, 919, 918] an eps of 6 will produce 3 clusters, see sklearn.cluster.DBSCAN documentation for more details :type key: function or None :param key: the function to get a number from the object, if None is the identity function :raise ValueError: if there is an issue :rtype: dict :return: clustered objects, keys are arbitrary cluster indices, values are lists of objects for the given cluster """ if not key: key = lambda x: x # do not use a dict here in case objects is a list of numbers # with degeneracies, for example [1.1, 1.1, 1.1, 4.5, 4.6, 8.9] numbers = numpy.array([key(x) for x in objects]).reshape(-1, 1) dist_matrix = metrics.pairwise_distances(numbers) labels = cluster.DBSCAN(eps=eps, min_samples=1, metric='precomputed').fit(dist_matrix).labels_ if -1 in labels: raise ValueError('Clustering failed.') groups = {} for label, obj in zip(labels, objects): groups.setdefault(label, []).append(obj) return groups