Source code for schrodinger.analysis.enrichment.plotter

"""
Functions and classes for plotting enrichment job outputs. e.g. basic
Sensitivity v 1-Specificity plots, etc.

Copyright Schrodinger, LLC. All rights reserved.
"""

import warnings

import schrodinger.utils.log as log
import schrodinger.utils.moduleproxy as moduleproxy
from schrodinger.analysis.enrichment import metrics

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    plt = moduleproxy.try_import("matplotlib.pyplot")

logger = log.get_output_logger(__file__)


[docs]def calc_sensitivity_with_rank(total_actives, active_ranks, rank): """ Calculates sensitivity at a particular rank, defined as: Se(rank) = found_actives / total_actives :param total_actives: The number of all active ligands in the screen, ranked and unranked. :type total_actives: int :param active_ranks: List of *unadjusted* integer ranks for the actives found in the screen. For example, a screen result that placed three actives as the first three ranks has an active_ranks list of = [1, 2, 3]. :type active_ranks: list(int) :param rank: Active rank at which to calculate the specificity. :type rank: int :return: Sensitivity of the screen at a given rank. :rtype: float """ # Number of actives found at given rank # offset 0-based index found_actives = active_ranks.index(rank) + 1 return found_actives / float(total_actives)
[docs]def calc_specificity_with_rank(total_actives, total_ligands, active_ranks, rank): """ Calculates specificity at a particular rank, defined as: Sp(rank) = discarded_decoys / total_decoys :param total_actives: The number of all active ligands in the screen, ranked and unranked. :type total_actives: int :param total_ligands: The number of the total number of ligands (actives and unknowns/decoys) used in the screen. :type total_ligands: int :param active_ranks: List of *unadjusted* integer ranks for the actives found in the screen. For example, a screen result that placed three actives as the first three ranks has an active_ranks list of = [1, 2, 3]. :type active_ranks: list(int) :param rank: Active rank at which to calculate the specificity. :type rank: int :return: Specificity of the screen at a given rank. :rtype: float """ total_decoys = total_ligands - total_actives if total_decoys == 0: raise ValueError("calculateSpecificity caught ZeroDivisionError") # Number of actives found at given rank # offset 0-based index found_actives = active_ranks.index(rank) + 1 discarded_decoys = total_decoys - rank + found_actives return discarded_decoys / float(total_decoys)
[docs]def get_percent_screen_curve_points(total_actives, total_ligands, active_ranks): """ :param total_actives: The number of all active ligands in the screen, ranked and unranked. :type total_actives: int :param total_ligands: The number of the total number of ligands (actives and unknowns/decoys) used in the screen. :type total_ligands: int :param active_ranks: List of *unadjusted* integer ranks for the actives found in the screen. For example, a screen result that placed three actives as the first three ranks has an active_ranks list of = [1, 2, 3]. :type active_ranks: list(int) :return: List of (%Screen, %Actives Found) tuples for the active ranks. """ curve_points = [] for act_idx, rank in enumerate(active_ranks, start=1): prcnt_act = float(act_idx) / float(total_actives) * 100.0 prcnt_scrn = float(rank) / float(total_ligands) * 100.0 item = (prcnt_scrn, prcnt_act) logger.debug("getPercentScreenCurvePoints: %s" % str(item)) curve_points.append(item) return curve_points
[docs]def get_plot_data(total_actives, total_ligands, active_ranks, title_ranks): """ :param total_actives: The number of all active ligands in the screen, ranked and unranked. :type total_actives: int :param total_ligands: The number of the total number of ligands (actives and unknowns/decoys) used in the screen. :type total_ligands: int :param active_ranks: List of *unadjusted* integer ranks for the actives found in the screen. For example, a screen result that placed three actives as the first three ranks has an active_ranks list of = [1, 2, 3]. :type active_ranks: list(int) :return: A list of active Title, Rank, Sensitivity, Specificity, %Actives Found, %Screen tuples. :rtype: list :note: This function should only be accessed via Calculator unless we figured how to get title_ranks without an additional O(n) work. :note: This list may grow, but the relative order of the columns should remain fixed. """ headers = [ 'Title', 'Rank', 'Specificity', '1-Specificity', 'Sensitivity', '%Screen', '%Actives Found', ] rows = [] rows.append(headers) for rank_idx, rank in enumerate(sorted(title_ranks), start=1): sp = calc_sensitivity_with_rank(total_actives, active_ranks, rank) prcnt_act = float(rank_idx) / float(total_actives) * 100.0 prcnt_scrn = float(rank) / float(total_ligands) * 100.0 active_data = [ title_ranks[rank], # Title rank, sp, 1.0 - sp, calc_sensitivity_with_rank(total_actives, active_ranks, rank), prcnt_scrn, prcnt_act, ] rows.append(active_data) return rows
[docs]def get_roc_curve_points(total_actives, total_ligands, active_ranks): """ Calculates set of points in ROC curve along each active rank. :param total_actives: The number of all active ligands in the screen, ranked and unranked. :type total_actives: int :param total_ligands: The number of the total number of ligands (actives and unknowns/decoys) used in the screen. :type total_ligands: int :param active_ranks: List of *unadjusted* integer ranks for the actives found in the screen. For example, a screen result that placed three actives as the first three ranks has an active_ranks list of = [1, 2, 3]. :type active_ranks: list(int) :return: List of (1 - specificity, sensitivity, rank) tuples. :rtype: list of tuples """ roc_curve_points = [] for rank in active_ranks: # False positive rate x = 1 - calc_specificity_with_rank(total_actives, total_ligands, active_ranks, rank) # True positive rate y = calc_sensitivity_with_rank(total_actives, active_ranks, rank) roc_curve_points.append((x, y, rank)) return roc_curve_points
[docs]def save_plot(total_actives, total_ligands, active_ranks, adjusted_active_ranks, total_ranked, png_file="plot.png", title='Screen Results', xlabel='1-Specificity', ylabel='Sensitivity'): """ Saves an image of the ROC plot, Sensitivity v 1-Specificity, to a png file. :param total_actives: The number of all active ligands in the screen, ranked and unranked. :type total_actives: int :param total_ligands: The number of the total number of ligands (actives and unknowns/decoys) used in the screen. :type total_ligands: int :param active_ranks: List of *unadjusted* integer ranks for the actives found in the screen. For example, a screen result that placed three actives as the first three ranks has an active_ranks list of = [1, 2, 3]. :type active_ranks: list(int) :param adjusted_active_ranks: Modified active ranks; each rank is improved by the number of preceding actives. For example, a screen result that placed three actives as the first three ranks, [1, 2, 3], has adjusted ranks of [1, 1, 1]. In this way, actives are not penalized by being outranked by other actives. :type adjusted_active_ranks: list(int) :param total_ranked: The number of unique ranked ligands. Deduced from results_file or merged_file. :type total_ranked: int :param png_file: Path to output file, default is 'plot.png'. :type png_file: str :param title: Plot title, default is 'Screen Results'. :type title: str :param xlabel: x-axis label, default is '1-Specificity'. :type xlabel: str :param ylabel: y-axis label, default is 'Sensitivity'. :type ylabel: str """ # FIXME: this method shares a log of code with Plotter.savePlot() # Determine if it is more useful to return a 'figure' # rather than serializing the image. y_points = [0] x_points = [0] for x, y, rank in get_roc_curve_points(total_actives, total_ligands, active_ranks): y_points.append(y) x_points.append(x) msg = "plotROC: rank %d, Se %f, 1-Sp %.2f" % (rank, y, x) logger.debug(msg) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.plot(x_points, y_points, 'bo-') # Reference curve for random performance. sp_ref = [0, 1.0] se_ref = [0, 1.0] plt.plot(sp_ref, se_ref, 'k-') x_points.append(1.0) # Add the top right corner to the shaded area. y_points.append(y) # Add the top right corner to the shaded area. x_points.append(1.0) # Add the bottom right corner to shaded area. y_points.append(0) # Add the bottom right corner to shaded area. plt.grid(True) plt.fill( x_points, y_points, facecolor='blue', edgecolor='r', linewidth=5.0, alpha=0.2 # Transparency. ) plt.title(title) plt.text( 0.30, 0.20, "BEDROC(\alpha=20, \alpha*Ra=%.4f): %.3f" % (metrics.calc_BEDROC( total_actives, total_ligands, active_ranks, alpha=20)[1], metrics.calc_BEDROC( total_actives, total_ligands, active_ranks, alpha=20)[0])) plt.text( 0.30, 0.15, "RIE: %.3f" % metrics.calc_RIE(total_actives, total_ligands, active_ranks, alpha=20)) plt.text( 0.30, 0.10, "ROC: %.3f" % metrics.calc_ROC(total_actives, total_ligands, adjusted_active_ranks)) plt.text( 0.30, 0.05, "AUAC: %.3f" % metrics.calc_AUAC( total_actives, total_ligands, total_ranked, active_ranks)) plt.savefig(png_file) plt.close()
################################################################################ # Classes ################################################################################ class _BasePlotter(object): """ Class on which Plotter and PercentScreenPlotter are based. """ def __init__(self, calculators, title, xlabel, ylabel, xmax, ymax, legend): """ :param calculators: List of Calculator instances. :type calculators: list(calculator.Calculator) :param title: The plot title. :type title: string """ self.calculators = calculators self.title = title self.xlabel = xlabel self.ylabel = ylabel self.legend = legend self.xmax = xmax self.ymax = ymax # TODO now that markers are no longer used, consolidate these into # a single style: self.styles = ['b-', 'b-', 'b-', 'b-', 'b-', 'b-'] self.series_labels = [] self.legend_location = 4 # Lower right. self.legend_font_size = 8 self.alpha = 0.1 # Fill transparency. def getPointsFromCalc(self, calculator): """ Returns points for this metric from the given Calculator instance. Implement this method in subclasses. :param calculator: Calculator instance. :type calculator: calculator.Calculator :return List of (x, y) points :rtype: list """ raise NotImplementedError() def plot(self, indexes=None): """ Launch interactive matplotlib viewer loaded with the plot. :param indexes: List of indexes into self.calcs to include in the plot. If the argument is None then all series are plotted. :type indexes: list """ plot_figure = self.getPlotFigure(indexes) plt.figure(num=plot_figure.number) plt.show() def showPlotWindow(self, win_title): """ Open a window with this plot. :param win_title: Title for the plot window. :type win_title: str """ # These imports are here and not at the top of the module because # many clients of the module don't need this functionality, and because # the import of navtoolbar itself produces annoying warnings when # Maestro is not available. from matplotlib.backends.backend_qtagg import \ FigureCanvasQTAgg as FigureCanvas import schrodinger.ui.qt.navtoolbar as navtoolbar plot_figure = self.getPlotFigure() graph = FigureCanvas(plot_figure) nav_toolbar = navtoolbar.NavToolbar(graph, graph.window(), False) nav_toolbar.lastDir = '.' nav_toolbar.show() graph.setWindowTitle(win_title) graph.show() def savePlot(self, png_file='plot.png'): """ Serialized figure to a png format file. :param png_file: Path to output png file. :type png_file: string :return: None """ plot_figure = self.getPlotFigure() plt.figure(num=plot_figure.number) plt.savefig(png_file) plt.close() def getPlotFigure(self, indexes=None): """ Returns a new pyplot figure the plot. :param indexes: List of indexes into self.calcs to include in the plot. If the argument is None then all series are plotted. :type indexes: list :return: a pyplot figure. """ plot_figure = plt.figure() self.series_labels = [] # Clear any previous labels. if indexes is None: for calculator in self.calculators: self.addSeries(calculator, plot_figure=plot_figure) else: for index in indexes: self.addSeries(self.calcs[index], plot_figure=plot_figure) self.addReferenceSeries(plot_figure=plot_figure) plt.title(self.title) plt.legend(self.series_labels, loc=self.legend_location) ltext = plt.gca().get_legend().get_texts() plt.setp(ltext[0], fontsize=self.legend_font_size) return plot_figure def addSeries(self, calculator, style=None, plot_figure=None): """ :param calculator: Calculator instance. :type calculator: calculator.Calculator :param style: The matplotlib linestyle. If style is None then a style is selected by fetching the next element from self.styles. :type style: string :param plot_figure: A pyplot figure. If None then the current figure is used. :type plot_figure: plt.Figure :return: None """ if plot_figure is None: plot_figure = plt.figure() plt.figure(num=plot_figure.number) if style is None: self.styles = self.styles[1:] + [self.styles[0]] style = self.styles[-1] # Unzip points to list of x and y coordinates, adding endpoints points = [(0.0, 0.0)] points += self.getPointsFromCalc(calculator) points += [(self.xmax, self.ymax)] x, y = list(zip(*points)) # PANEL-8740: Points are changes in True Positive Rate (y-coordinate), # must use "steps-post" to get correct step plot. plt.plot(x, y, style, drawstyle="steps-post") plt.xlim([0.0, self.xmax * 1.05]) plt.ylim([0.0, self.ymax * 1.05]) plt.xlabel(self.xlabel) plt.ylabel(self.ylabel) plt.grid(True) self.series_labels.append(self.legend) def addReferenceSeries(self, style=None, plot_figure=None): """ Adds a diagonal representing random performance. :param style: The matplotlib linestyle. If style is None then 'k-' is used. :type style: string :param plot_figure: A pyplot figure. If None then the current figure is used. :type plot_figure: pyplot.Figure :return: None """ if style is None: style = 'k-' if plot_figure is None: plot_figure = plt.figure() plt.figure(num=plot_figure.number) # Reference curve for random performance. plt.plot([0, self.xmax], [0, self.ymax], '#BFBFBF') self.series_labels.append('Random')
[docs]class Plotter(_BasePlotter): """ A class to plot multiple series of Calculator instances. API example where `enrich_calc1` and `enrich_calc2` are instances of Calculator:: enrich_plotter = plotter.Plotter([enrich_calc1, enrich_calc2]) enrich_plotter.plot() # Launch interactive plot window. enrich_plotter.savePlot('my_plot.png') # Save plot to file. There are six line styles defined by default. Plotting more than six results cycles through the styles. """
[docs] def __init__(self, calculators, title='Screen Results', xlabel='1-Specificity', ylabel='Sensitivity', legend_label='Screen Results'): """ :param calculators: List of Calculator instances. :type calculators: list(calculator.Calculator) :param title: The plot title. :type title: string :param xlabel: The x-axis label. :type xlabel: str :param ylabel: The y-axis label. :type ylabel: str :param legend_label: The legend label. :type legend_label: str """ super(Plotter, self).__init__(calculators, title, xlabel, ylabel, xmax=1.0, ymax=1.0, legend=legend_label)
[docs] def getPointsFromCalc(self, calculator): """ Returns points for this metric from the given Calculator instance. :param calculator: Calculator instance. :type calculator: calculator.Calculator :return List of (x, y) points :rtype: list """ return [(x, y) for x, y, rank in get_roc_curve_points( calculator.total_actives, calculator.total_ligands, calculator.active_ranks)]
[docs]class PercentScreenPlotter(_BasePlotter): """ A class to plot multiple series of Calculator data as %Actives Found vs %Screen. API example where `enrich_calc1` and `enrich_calc2` are instances of Calculator:: enrich_plotter = plotter.PercentScreenPlotter([enrich_calc1, enrich_calc2]) enrich_plotter.plot() # Launch interactive plot window. enrich_plotter.savePlot('my_plot.png') # Save plot to file. There are six line styles defined by default. Plotting more than six results cycles through the styles. """
[docs] def __init__(self, calculators, title='Screen Results', xlabel='Percent Screen', ylabel='Percent Actives Found', legend_label='Screen Results'): """ :param calculators: List of Calculator instances. :type calculators: list(calculator.Calculator) :param title: The plot title. :type title: string :param xlabel: The x-axis label. :type xlabel: str :param ylabel: The y-axis label. :type ylabel: str :param legend_label: The legend label. :type legend_label: str """ super(PercentScreenPlotter, self).__init__(calculators, title, xlabel, ylabel, xmax=100.0, ymax=100.0, legend=legend_label)
[docs] def getPointsFromCalc(self, calculator): """ Returns points for this metric from the given Calculator instance. :param calculator: Calculator instance. :type calculator: calculator.Calculator :return List of (x, y) points :rtype: list """ return get_percent_screen_curve_points(calculator.total_actives, calculator.total_ligands, calculator.active_ranks)