Source code for schrodinger.analysis.enrichment.calculator

"""
This module contains the class for generating the default enrichment report.

Example metrics from two different screens:

The enrichment metrics from example_A are generally more favorable than
those from example_B.

Enrichment Report
-----------------

Actives file: example_A_actives.txt
Results: example_A_dock_pv.rept
Total actives: 117
Total ligands(actives+decoys): 1117
Number of ranked actives: 117

BEDROC(alpha=160.9, alpha*Ra=16.8534): 1.000
BEDROC(alpha=20.0, alpha*Ra=2.0949): 0.914
BEDROC(alpha=8.0, alpha*Ra=0.8380): 0.868
ROC: 0.92
RIE: 7.65
Area under accumulation curve: 0.87
Ave. Number of outranking decoys: 82

Count and percentage of actives in top N% of decoy results.
# Actives (1%|2%|5%|10%|20%):    90|   90|   92|   94|   97
% Actives (1%|2%|5%|10%|20%):  76.9| 76.9| 78.6| 80.3| 82.9

Enrichment Factors with respect to N% sample size.
EF (1%|2%|5%|10%|20%):     9.5|    9.5|    9.4|    7.7|    4.1
EF*(1%|2%|5%|10%|20%):      77|     38|     16|      8|    4.1
EF'(1%|2%|5%|10%|20%): 2.9e+02|1.7e+02|     54|     23|    9.9
Eff(1%|2%|5%|10%|20%):   0.974|  0.949|   0.88|  0.779|  0.611

Enrichment Factors with respect to N% actives recovered.
EF (40%|50%|60%|70%|80%|90%|100%):   9.3|  9.4|  9.4|  9.2|  5.7|    2|  1.3
EF*(40%|50%|60%|70%|80%|90%|100%): 4e+02|5e+02|6e+02|2.3e+02|   13|  2.2|  1.4
EF'(40%|50%|60%|70%|80%|90%|100%): 3.8e+02|4.3e+02|4.7e+02|4.3e+02|   38|  4.7|  2.7
FOD(40%|50%|60%|70%|80%|90%|100%): 9e-05|0.0003|0.0004|0.0006|0.003| 0.03| 0.08


Enrichment Report
-----------------

Actives file: example_B_actives.txt
Results: example_B_dock_pv.rept
Total actives: 62
Total ligands(actives+decoys): 1062
Number of ranked actives: 62

BEDROC(alpha=160.9, alpha*Ra=9.3934): 0.703
BEDROC(alpha=20.0, alpha*Ra=1.1676): 0.256
BEDROC(alpha=8.0, alpha*Ra=0.4670): 0.323
ROC: 0.72
RIE: 3.02
Area under accumulation curve: 0.71
Ave. Number of outranking decoys: 281

Count and percentage of actives in top N% of decoy results.
# Actives (1%|2%|5%|10%|20%):     8|    8|    9|   13|   23
% Actives (1%|2%|5%|10%|20%):  12.9| 12.9| 14.5| 21.0| 37.1

Enrichment Factors with respect to N% sample size.
EF (1%|2%|5%|10%|20%):      12|    6.5|    2.9|    2.1|    1.6
EF*(1%|2%|5%|10%|20%):      13|    6.5|    2.9|    2.1|    1.9
EF'(1%|2%|5%|10%|20%):      23|     12|    5.3|    3.4|    2.3
Eff(1%|2%|5%|10%|20%):   0.856|  0.732|  0.488|  0.354|  0.299

Enrichment Factors with respect to N% actives recovered.
EF (40%|50%|60%|70%|80%|90%|100%):   1.8|    2|  1.9|    2|  1.6|  1.6|    1
EF*(40%|50%|60%|70%|80%|90%|100%):   1.9|  2.1|    2|  2.1|  1.6|  1.6|  1.1
EF'(40%|50%|60%|70%|80%|90%|100%):   2.3|  2.2|  2.2|  2.2|  2.1|    2|  1.6
FOD(40%|50%|60%|70%|80%|90%|100%):   0.1|  0.1|  0.1|  0.2|  0.2|  0.2|  0.3

Copyright Schrodinger, LLC. All rights reserved.
"""

import os
import sys
from collections import defaultdict

from decorator import decorator

import schrodinger.application.canvas.fingerprint as canvas_fp
import schrodinger.application.canvas.similarity as canvas_sim
import schrodinger.structure as structure
import schrodinger.utils.fileutils as fileutils
import schrodinger.utils.log as log
from schrodinger.analysis.enrichment import enrichment_input
from schrodinger.analysis.enrichment import metrics
from schrodinger.analysis.enrichment import plotter
from schrodinger.application.glide.utils import is_valid_pv_file

CSV_FILE_EXT = ('.csv', '.CSV')
ST_TITLE_HEADER = "s_m_title"

logger = log.get_output_logger(__file__)


@decorator
def _exception_handler(func, *args, **kwargs):
    """
    A decorator to handle exceptions bubbling from functions in metrics.py.

    """
    # special_functions stores a list of Calculator class function names as
    # keys. These functions return multiple values, thus need to be
    # handled individually to ensure the conformity of return values.
    special_functions = {
        "calcBEDROC": (None, None),
    }
    try:
        return func(*args, **kwargs)
    except (ValueError, ZeroDivisionError, OverflowError, AttributeError) as e:
        func_name = func.__name__
        logger.debug(f"{func_name} caught {type(e).__name__}: {e}")
        logger.debug("Setting output as None.")
        return special_functions.get(func_name, None)


###############################################################################
# Classes
###############################################################################


[docs]class Calculator:
    """
    A class to report default set of enrichment terms for a screen.
    By default, a report containing a suite of metrics is directed to
    standard out.

    :note: This is not the preferred way to obtain enrichment metrics.
           Please consider using parser and metric functions directly in
           enrichment_input.py and metrics.py if possible.

    :cvar ef_precision: Number of decimals when reporting EF values.
                        Default = 2
    :vartype ef_precision: int

    :cvar efp_precision: Number of decimals when reporting EF' values.
                         Default = 2
    :vartype efp_precision: int

    :cvar efs_precision: Number if decimals when reporting EF* values.
                         Default = 2
    :vartype efs_precision: int

    :cvar eff_precision: Number of decimals when reporting Eff values.
                         Default = 3
    :vartype eff_precision: int

    :cvar fod_precision: Number of decimals when reporting FOD values.
                         Default = 1
    :vartype fod_precision: int

    """

    # Default reporting formats with %g idiom.
    ef_precision = 2
    efs_precision = 2
    efp_precision = 2
    eff_precision = 3
    fod_precision = 1

[docs]    def __init__(self, actives, results, total_decoys=0):
        """
        :param actives: File name or a list of strings containing all active
                        titles. If a file name is provided, the input should be
                        a valid csv or structure file, a raw text file
                        containing one line per title is also acceptable.
                        Duplicate titles are discarded, only the first
                        occurrence is recorded.
        :type actives: str or list(str)

        :param results: File name, a list of strings, or a list of
                        structure.Structure containing the virtual screening
                        result ordered by the scoring metric. If a file name is
                        provided, the input should be a valid csv file or
                        structure file. Duplicate titles are discarded, only the
                        first occurrence is recorded.
        :type results: str or list(str) or list(structure.Structure)

        :param total_decoys: The total number of decoys. If specified, the total
                             number of ligands will be distinct active titles
                             from actives file + num_decoy. This will enable the
                             calculation of the correction term in calc_AUAC,
                             should the total number of ligands not equal to the
                             total number of ranked titles in results_file.
        :type total_decoys: int

        """

        # Used for default enrichment report
        self.percentage_active = (0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)
        self.percentage_ligand = (0.01, 0.02, 0.05, 0.10, 0.20)

        # Values used in fingerprint components, uses in report()
        self.min_Tc_total_actives = None
        self.missing_active_titles = None

        # Parse the actives file/list
        self.active_titles = self._parseActives(actives)

        # Parse the results file/list
        self._parseResults(results, total_decoys)

        # Shortcut for self.report()
        if isinstance(actives, str):
            self.actives = os.path.basename(actives)
        else:
            self.actives = "List of active titles"
        if isinstance(results, str):
            self.results = os.path.basename(results)
        else:
            self.results = "List of ranked ligand titles"
        self.total_decoys = self.total_ligands - self.total_actives

        # Calculate all of the default metrics
        self._calculateMetrics()

    def __repr__(self):
        rpr = "EnrichmentReporter(%s, %s, %d)" % (
            self.actives,
            self.results,
            int(self.total_decoys)  # Cast back to input integer.
        )
        return rpr

    def _parseActives(self, actives_file):
        """
        parse actives_file, return distinct active titles from the actives file.
        Repeated active titles are ignored.

        :param actives_file: File name or a list of strings containing all
                             active titles. If a file name is provided, the
                             input should be a valid csv or structure file,
                             a raw text file containing one line per title
                             is also acceptable. Duplicate titles are
                             discarded, only the first occurrence is
                             recorded.
        :type actives_file: str or list(str)

        :return: distinct active titles from the actives file.
        :rtype: set(str)

        """

        if isinstance(actives_file, str):
            _, ext = fileutils.splitext(actives_file)
            # csv file
            if ext in CSV_FILE_EXT:
                active_titles = \
                    enrichment_input.extract_active_titles_from_csv(
                        actives_file)
            # structure file
            elif fileutils.get_structure_file_format(actives_file):
                active_titles = \
                    enrichment_input.extract_active_titles_from_mae(
                        actives_file)
            # assume input is a plain text file otherwise
            else:
                active_titles = \
                    enrichment_input.extract_active_titles_from_txt(
                        actives_file)
        # Actives input is a sequence of structure titles.
        elif hasattr(actives_file, '__iter__'):
            if not actives_file:
                raise TypeError("Actives list must not be empty.")
            if isinstance(actives_file[0], str):
                active_titles = \
                    enrichment_input.extract_active_titles_from_list(
                        actives_file)
            else:
                raise TypeError("Actives list must be a list of strings.")
        else:
            raise TypeError("Actives input type must be a string or a list.")

        return active_titles

    def _parseResults(self, results_file, num_decoy):
        """
        Identify the file type of results_file and call the correct parser
        function to set rank and count related class variables.

        :param results_file: File name, a list of strings, or a list of
                             structure.Structure containing the virtual
                             screening result ordered by the scoring metric.
                             If a file name is provided, the input should be a
                             valid csv file or structure file. Duplicate titles
                             are discarded, only the first occurrence is
                             recorded.
        :type results_file: str or list(str) or list(structure.Structure)

        :param num_decoy: The total number of decoys. If specified, the total
                          number of ligands will be distinct active titles
                          from actives file + num_decoy. This will enable the
                          calculation of the correction term in calc_AUAC,
                          should the total number of ligands not equal to the
                          total number of ranked titles in results_file.
        :type num_decoy: int

        """

        if isinstance(results_file, str):
            _, ext = fileutils.splitext(results_file)
            # csv file
            if ext in CSV_FILE_EXT:
                self.total_actives, self.total_ligands, self.active_ranks, \
                    self.adjusted_active_ranks, self.total_ranked, \
                    self.title_ranks = \
                        enrichment_input.extract_ranks_from_csv(
                            csv_file_name=results_file,
                            active_titles=self.active_titles,
                            num_decoy=num_decoy)
            # structure file
            elif fileutils.get_structure_file_format(results_file):
                self.total_actives, self.total_ligands, self.active_ranks, \
                    self.adjusted_active_ranks, self.total_ranked, \
                    self.title_ranks = \
                        enrichment_input.extract_ranks_from_mae(
                            mae_file_name=results_file,
                            active_titles=self.active_titles,
                            num_decoy=num_decoy)
                self._getFingerprintComponent(results_file)
            else:
                msg = "Results must be a csv file or a maestro file."
                raise TypeError(msg)
        # Actives input is a sequence of structure titles.
        elif hasattr(results_file, '__iter__'):
            if not results_file:
                raise TypeError("Results list must not be empty.")
            # list of strings
            if isinstance(results_file[0], str):
                self.total_actives, self.total_ligands, self.active_ranks, \
                    self.adjusted_active_ranks, self.total_ranked, \
                    self.title_ranks = \
                        enrichment_input.extract_ranks_from_list(
                            titles_iter=results_file,
                            active_titles=self.active_titles,
                            num_decoy=num_decoy)
            # list of structure.Structure
            elif isinstance(results_file[0], structure.Structure):
                self.total_actives, self.total_ligands, self.active_ranks, \
                    self.adjusted_active_ranks, self.total_ranked, \
                    self.title_ranks = \
                        enrichment_input.extract_ranks_from_structures(
                            structure_iter=results_file,
                            active_titles=self.active_titles,
                            num_decoy=num_decoy)
                self._getFingerprintComponent(results_file)
            else:
                msg = "Results list must be a list of strings " \
                      "or structure.Structure."
                raise TypeError(msg)
        else:
            raise TypeError("Results input type must be a string or a list.")

    def _getFingerprintComponent(self, results_file):
        """
        Initialize and return a data class object needed for fingerprint-related
        calculations. This is basically enrichment_input.getFingerprintComponent
        but we'll also need to find the missing active titles for the reporter.

        :param results_file: File name, a list of strings, or a list of
                             structure.Structure containing the virtual
                             screening result ordered by the scoring metric.
                             If a file name is provided, the input should be a
                             valid csv file or structure file. Duplicate titles
                             are discarded, only the first occurrence is
                             recorded.
        :type results_file: str or list(str) or list(structure.Structure)

        """

        fp_gen = canvas_fp.CanvasFingerprintGenerator(logger=logger)
        fp_gen.setType('Linear')
        fp_gen.setAtomBondTyping(fp_gen.getDefaultAtomTypingScheme())
        fp_sim = canvas_sim.CanvasFingerprintSimilarity(logger=logger)
        fp_sim.setMetric('Tanimoto')

        # Bind initial values
        title_ranks = {}
        hits_titles = set()
        active_fingerprint = {}

        if isinstance(results_file, str):
            # Assume Glide results are typical, skip the receptor in pv files
            file_index = 2 if is_valid_pv_file(results_file) else 1
            for st in structure.StructureReader(results_file, file_index):
                title = st.property[ST_TITLE_HEADER]
                enrichment_input._fingerprint_gen(st, title, self.active_titles,
                                                  fp_gen, hits_titles,
                                                  title_ranks,
                                                  active_fingerprint)
        elif hasattr(results_file, '__iter__'):
            for st in results_file:
                title = st.property[ST_TITLE_HEADER]
                enrichment_input._fingerprint_gen(st, title, self.active_titles,
                                                  fp_gen, hits_titles,
                                                  title_ranks,
                                                  active_fingerprint)

        min_tc = enrichment_input._calc_min_tc_total_actives(
            fp_sim, active_fingerprint)

        self.min_Tc_total_actives = min_tc
        self.fingerprint_object = enrichment_input.FingerprintComponent(
            fp_gen, fp_sim, active_fingerprint, min_tc)

        self.missing_active_titles = \
            [title for title in self.active_titles if title not in hits_titles]

    ###################
    # Metrics functions
    ###################

    def _getActiveSampleSizeStar(self, fraction_of_actives):
        return metrics.get_active_sample_size_star(
            total_actives=self.total_actives,
            total_ligands=self.total_ligands,
            adjusted_active_ranks=self.adjusted_active_ranks,
            fraction_of_actives=fraction_of_actives)

    def _getActiveSampleSize(self, fraction_of_actives):
        return metrics.get_active_sample_size(
            total_actives=self.total_actives,
            total_ligands=self.total_ligands,
            active_ranks=self.active_ranks,
            fraction_of_actives=fraction_of_actives)

    def _getDecoySampleSize(self, fraction_of_decoys):
        return metrics.get_decoy_sample_size(
            total_actives=self.total_actives,
            total_ligands=self.total_ligands,
            active_ranks=self.active_ranks,
            fraction_of_decoys=fraction_of_decoys)

[docs]    @_exception_handler
    def calcEF(self, n_sampled_set, min_actives=None):
        return metrics.calc_EF(total_actives=self.total_actives,
                               total_ligands=self.total_ligands,
                               active_ranks=self.active_ranks,
                               n_sampled_set=n_sampled_set,
                               min_actives=min_actives)

[docs]    @_exception_handler
    def calcEFStar(self, n_sampled_decoy_set, min_actives=None):
        return metrics.calc_EFStar(total_actives=self.total_actives,
                                   total_ligands=self.total_ligands,
                                   active_ranks=self.active_ranks,
                                   n_sampled_decoy_set=n_sampled_decoy_set,
                                   min_actives=min_actives)

[docs]    @_exception_handler
    def calcEFP(self, n_sampled_decoy_set, min_actives=None):
        return metrics.calc_EFP(total_actives=self.total_actives,
                                total_ligands=self.total_ligands,
                                active_ranks=self.active_ranks,
                                n_sampled_decoy_set=n_sampled_decoy_set,
                                min_actives=min_actives)

[docs]    @_exception_handler
    def calcFOD(self, fraction_of_actives):
        return metrics.calc_FOD(total_actives=self.total_actives,
                                total_ligands=self.total_ligands,
                                active_ranks=self.active_ranks,
                                fraction_of_actives=fraction_of_actives)

[docs]    @_exception_handler
    def calcEFF(self, fraction_of_decoys):
        return metrics.calc_EFF(
            total_actives=self.total_actives,
            total_ligands=self.total_ligands,
            adjusted_active_ranks=self.adjusted_active_ranks,
            fraction_of_decoys=fraction_of_decoys)

[docs]    def calcActivesInN(self, n_sampled_set):
        return metrics.calc_ActivesInN(active_ranks=self.active_ranks,
                                       n_sampled_set=n_sampled_set)

[docs]    def calcActivesInNStar(self, n_sampled_set):
        return metrics.calc_ActivesInNStar(
            adjusted_active_ranks=self.adjusted_active_ranks,
            n_sampled_set=n_sampled_set)

[docs]    def calcAveNumberOutrankingDecoys(self):
        return metrics.calc_AveNumberOutrankingDecoys(
            active_ranks=self.active_ranks)

[docs]    @_exception_handler
    def calcBEDROC(self, alpha=20.0):
        return metrics.calc_BEDROC(total_actives=self.total_actives,
                                   total_ligands=self.total_ligands,
                                   active_ranks=self.active_ranks,
                                   alpha=alpha)

[docs]    @_exception_handler
    def calcRIE(self, alpha=20.0):
        return metrics.calc_RIE(total_actives=self.total_actives,
                                total_ligands=self.total_ligands,
                                active_ranks=self.active_ranks,
                                alpha=alpha)

[docs]    @_exception_handler
    def calcAUAC(self):
        return metrics.calc_AUAC(total_actives=self.total_actives,
                                 total_ligands=self.total_ligands,
                                 total_ranked=self.total_ranked,
                                 active_ranks=self.active_ranks)

[docs]    @_exception_handler
    def calcROC(self):
        return metrics.calc_ROC(
            total_actives=self.total_actives,
            total_ligands=self.total_ligands,
            adjusted_active_ranks=self.adjusted_active_ranks)

[docs]    @_exception_handler
    def calcDEF(self, n_sampled_set, min_actives=None):
        return metrics.calc_DEF(total_actives=self.total_actives,
                                total_ligands=self.total_ligands,
                                active_ranks=self.active_ranks,
                                title_ranks=self.title_ranks,
                                fingerprint_comp=self.fingerprint_object,
                                n_sampled_set=n_sampled_set,
                                min_actives=min_actives)

[docs]    @_exception_handler
    def calcDEFStar(self, n_sampled_decoy_set, min_actives=None):
        return metrics.calc_DEFStar(total_actives=self.total_actives,
                                    total_ligands=self.total_ligands,
                                    active_ranks=self.active_ranks,
                                    title_ranks=self.title_ranks,
                                    fingerprint_comp=self.fingerprint_object,
                                    n_sampled_decoy_set=n_sampled_decoy_set,
                                    min_actives=min_actives)

[docs]    @_exception_handler
    def calcDEFP(self, n_sampled_decoy_set, min_actives=None):
        return metrics.calc_DEFP(total_actives=self.total_actives,
                                 total_ligands=self.total_ligands,
                                 active_ranks=self.active_ranks,
                                 title_ranks=self.title_ranks,
                                 fingerprint_comp=self.fingerprint_object,
                                 n_sampled_decoy_set=n_sampled_decoy_set,
                                 min_actives=min_actives)

    ####################
    # Plotter functions
    ####################

[docs]    def calculateSpecificity(self, rank):
        return plotter.calc_specificity_with_rank(
            total_actives=self.total_actives,
            total_ligands=self.total_ligands,
            active_ranks=self.active_ranks,
            rank=rank)

[docs]    def getPercentScreenCurvePoints(self):
        return plotter.get_percent_screen_curve_points(
            total_actives=self.total_actives,
            total_ligands=self.total_ligands,
            active_ranks=self.active_ranks)

[docs]    def getActiveRankCsvRows(self):
        return plotter.get_plot_data(total_actives=self.total_actives,
                                     total_ligands=self.total_ligands,
                                     active_ranks=self.active_ranks,
                                     title_ranks=self.title_ranks)

[docs]    def savePlot(self,
                 png_file="plot.png",
                 title='Screen Results',
                 xlabel='1-Specificity',
                 ylabel='Sensitivity'):
        return plotter.save_plot(
            total_actives=self.total_actives,
            total_ligands=self.total_ligands,
            active_ranks=self.active_ranks,
            adjusted_active_ranks=self.adjusted_active_ranks,
            total_ranked=self.total_ranked,
            png_file=png_file,
            title=title,
            xlabel=xlabel,
            ylabel=ylabel)

    ####################
    # Reporter functions
    ####################

[docs]    @staticmethod
    def format(value, precision=2):
        """
        :param value: Float value to format as string.
        :type value: float or None

        :param precision: Number of digits after the decimal.
        :type precision: int

        :return: a string representation of the passed value.  If the value is
                 None then the returned string is 'n/a'.  Uses %g formatting
                 idiom so large values are returned as exponentials.
        :rtype: str

        """

        str_value = 'n/a' if value is None else "%.*g" % (precision, value)

        return str_value

    def _calculateMetrics(self):
        """
        Sets a suite of enrichment factor terms as instance data members.

        """

        active_sample_size_list = [
            self._getActiveSampleSize(e) for e in self.percentage_active
        ]
        active_sample_size_list_star = [
            self._getActiveSampleSizeStar(e) for e in self.percentage_active
        ]
        m_active_size_list = [
            round(self.total_actives * e) for e in self.percentage_active
        ]
        ligand_sample_size_list = [
            self._getDecoySampleSize(e) for e in self.percentage_ligand
        ]
        m_ligand_size_list = [
            round(self.total_ligands * e) for e in self.percentage_ligand
        ]

        self.ave_num_outranking_decoys = self.calcAveNumberOutrankingDecoys()
        self.bedroc20, self.alphaRa20 = self.calcBEDROC(alpha=20.0)
        self.bedroc160_9, self.alphaRa160_9 = self.calcBEDROC(alpha=160.9)
        self.bedroc8_0, self.alphaRa8_0 = self.calcBEDROC(alpha=8.0)
        self.roc = self.calcROC()
        self.rie = self.calcRIE()
        self.auac = self.calcAUAC()

        for pct, n_sampled_set, n_sampled_decoy_set, min_actives in zip(
                self.percentage_active, active_sample_size_list,
                active_sample_size_list_star, m_active_size_list):
            ef_name = f'ef_{int(pct * 100)}'
            ef_value = self.calcEF(n_sampled_set, min_actives)
            setattr(self, ef_name, ef_value)

            efs_name = f'efs_{int(pct * 100)}'
            efs_value = self.calcEFStar(n_sampled_decoy_set, min_actives)
            setattr(self, efs_name, efs_value)

            efp_name = f'efp_{int(pct * 100)}'
            efp_value = self.calcEFP(n_sampled_decoy_set, min_actives)
            setattr(self, efp_name, efp_value)

            fod_stat_name = f"fod_{int(pct * 100)}"
            fod_stat_value = self.calcFOD(pct)
            setattr(self, fod_stat_name, fod_stat_value)

        for pct, n_sampled_set, n_sampled_decoy_set in zip(
                self.percentage_ligand, m_ligand_size_list,
                ligand_sample_size_list):
            stat_name = f"actives_in_top_{int(pct * 100)}_pct"
            stat_value = self.calcActivesInN(self.total_ligands * pct)
            setattr(self, stat_name, stat_value)

            pct_stat_name = f"pct_actives_in_top_{int(pct * 100)}_pct"
            pct_stat_value = (100.0 * stat_value / self.total_actives)
            setattr(self, pct_stat_name, pct_stat_value)

            stat_name_star = f"actives_in_top_{int(pct * 100)}_pct_star"
            stat_value_star = self.calcActivesInNStar(self.total_decoys * pct)
            setattr(self, stat_name_star, stat_value_star)

            pct_stat_name_star = f"pct_actives_in_top_{int(pct * 100)}_pct_star"
            pct_stat_value_star = (100.0 * stat_value_star / self.total_actives)
            setattr(self, pct_stat_name_star, pct_stat_value_star)

            ef_pct_name = f"ef_{int(pct * 100)}pct"
            ef_pct_value = self.calcEF(n_sampled_set)
            setattr(self, ef_pct_name, ef_pct_value)

            efs_pct_name = f"efs_{int(pct * 100)}pct"
            efs_pct_value = self.calcEFStar(n_sampled_decoy_set)
            setattr(self, efs_pct_name, efs_pct_value)

            efp_pct_name = f"efp_{int(pct * 100)}pct"
            efp_pct_value = self.calcEFP(n_sampled_decoy_set)
            setattr(self, efp_pct_name, efp_pct_value)

            eff_stat_name = f"eff_{int(pct * 100)}pct"
            eff_stat_value = self.calcEFF(pct)
            setattr(self, eff_stat_name, eff_stat_value)

            def_name = f"def_{int(pct * 100)}pct"
            def_value = self.calcDEF(n_sampled_set)
            setattr(self, def_name, def_value)

            defs_name = f"defs_{int(pct * 100)}pct"
            defs_value = self.calcDEFStar(n_sampled_decoy_set)
            setattr(self, defs_name, defs_value)

            defp_name = f"defp_{int(pct * 100)}pct"
            defp_value = self.calcDEFP(n_sampled_decoy_set)
            setattr(self, defp_name, defp_value)

            # Assign synomym attributes so clients of the deprecated
            # glideanalysis.Enrichment class are less affected by the change.
            old_ef_name = f"ef_{int(pct * 100)}pct"
            new_ef_name = f"efd_{int(pct * 100)}"
            new_ef_value = getattr(self, old_ef_name)
            setattr(self, new_ef_name, new_ef_value)

            old_ait_name = f"actives_in_top_{int(pct * 100)}_pct"
            new_ait_name = f"actives_in_top_{int(pct * 100)}_prcnt"
            new_ait_value = getattr(self, old_ait_name)
            setattr(self, new_ait_name, new_ait_value)

[docs]    def report(self, file_handle=sys.stdout, header="", footer=""):
        """
        Prints text summary of results to the file_handle.

        :param header: Header for the report.
        :type header: str

        :param footer: Footer for the report.
        :type footer: str

        :param file_handle: File handle-like object, default is sys.stdout.
        :type file_handle: file

        """

        out_fh = file_handle

        results = self.results
        if not isinstance(results, str):
            results = 'Structure list (%d items)' % len(self.results)

        line = '=' * 80
        out_fh.write(line)
        out_fh.write('\n')
        line = """
Enrichment Report
-----------------
%s
Actives file: %s
Results: %s
Total actives: %d
Total ligands(actives+decoys): %d
Number of ranked actives: %d

BEDROC(alpha=160.9, alpha*Ra=%.4f): %.3f
BEDROC(alpha=20.0, alpha*Ra=%.4f): %.3f
BEDROC(alpha=8.0, alpha*Ra=%.4f): %.3f
ROC: %.2f
RIE: %.2f
Area under accumulation curve: %.2f
Ave. Number of outranking decoys: %d
Minimum Tc over all active pairs: %s
        """ % (header, self.actives, results, self.total_actives,
               self.total_ligands, len(
                   self.active_ranks), self.alphaRa160_9, self.bedroc160_9,
               self.alphaRa20, self.bedroc20, self.alphaRa8_0, self.bedroc8_0,
               self.roc, self.rie, self.auac, self.ave_num_outranking_decoys,
               self.format(self.min_Tc_total_actives, 2))
        out_fh.write(line)

        line = """
Count and percentage of actives in top N%% of decoy results.
%% Decoys  |%s|%s|%s|%s|%s|
# Actives |%s|%s|%s|%s|%s|
%% Actives |%s|%s|%s|%s|%s|
        """ % ("%5s" % "1%", "%5s" % "2%", "%5s" % "5%", "%5s" % "10%",
               "%5s" % "20%", "%5d" % self.actives_in_top_1_pct_star,
               "%5d" % self.actives_in_top_2_pct_star,
               "%5d" % self.actives_in_top_5_pct_star,
               "%5d" % self.actives_in_top_10_pct_star,
               "%5d" % self.actives_in_top_20_pct_star,
               "%5.1f" % self.pct_actives_in_top_1_pct_star,
               "%5.1f" % self.pct_actives_in_top_2_pct_star,
               "%5.1f" % self.pct_actives_in_top_5_pct_star,
               "%5.1f" % self.pct_actives_in_top_10_pct_star,
               "%5.1f" % self.pct_actives_in_top_20_pct_star)
        out_fh.write(line)

        line = """
Count and percentage of actives in top N%% of results.
%% Results |%s|%s|%s|%s|%s|
# Actives |%s|%s|%s|%s|%s|
%% Actives |%s|%s|%s|%s|%s|
        """ % ("%5s" % "1%", "%5s" % "2%", "%5s" % "5%", "%5s" % "10%",
               "%5s" % "20%", "%5d" % self.actives_in_top_1_pct, "%5d" %
               self.actives_in_top_2_pct, "%5d" % self.actives_in_top_5_pct,
               "%5d" % self.actives_in_top_10_pct,
               "%5d" % self.actives_in_top_20_pct,
               "%5.1f" % self.pct_actives_in_top_1_pct,
               "%5.1f" % self.pct_actives_in_top_2_pct,
               "%5.1f" % self.pct_actives_in_top_5_pct,
               "%5.1f" % self.pct_actives_in_top_10_pct,
               "%5.1f" % self.pct_actives_in_top_20_pct)
        out_fh.write(line)

        line = """
Enrichment Factors with respect to N%% sample size.
%% Sample |%s|%s|%s|%s|%s|
EF       |%s|%s|%s|%s|%s|
EF*      |%s|%s|%s|%s|%s|
EF'      |%s|%s|%s|%s|%s|
DEF      |%s|%s|%s|%s|%s|
DEF*     |%s|%s|%s|%s|%s|
DEF'     |%s|%s|%s|%s|%s|
Eff      |%s|%s|%s|%s|%s|
        """ % (
            "%7s" % "1%",
            "%7s" % "2%",
            "%7s" % "5%",
            "%7s" % "10%",
            "%7s" % "20%",
            "%7s" % self.format(self.ef_1pct, self.ef_precision),
            "%7s" % self.format(self.ef_2pct, self.ef_precision),
            "%7s" % self.format(self.ef_5pct, self.ef_precision),
            "%7s" % self.format(self.ef_10pct, self.ef_precision),
            "%7s" % self.format(self.ef_20pct, self.ef_precision),
            "%7s" % self.format(self.efs_1pct, self.efs_precision),
            "%7s" % self.format(self.efs_2pct, self.efs_precision),
            "%7s" % self.format(self.efs_5pct, self.efs_precision),
            "%7s" % self.format(self.efs_10pct, self.efs_precision),
            "%7s" % self.format(self.efs_20pct, self.efs_precision),
            "%7s" % self.format(self.efp_1pct, self.efp_precision),
            "%7s" % self.format(self.efp_2pct, self.efp_precision),
            "%7s" % self.format(self.efp_5pct, self.efp_precision),
            "%7s" % self.format(self.efp_10pct, self.efp_precision),
            "%7s" % self.format(self.efp_20pct, self.efp_precision),
            "%7s" % self.format(self.def_1pct, self.ef_precision),
            "%7s" % self.format(self.def_2pct, self.ef_precision),
            "%7s" % self.format(self.def_5pct, self.ef_precision),
            "%7s" % self.format(self.def_10pct, self.ef_precision),
            "%7s" % self.format(self.def_20pct, self.ef_precision),
            "%7s" % self.format(self.defs_1pct, self.efs_precision),
            "%7s" % self.format(self.defs_2pct, self.efs_precision),
            "%7s" % self.format(self.defs_5pct, self.efs_precision),
            "%7s" % self.format(self.defs_10pct, self.efs_precision),
            "%7s" % self.format(self.defs_20pct, self.efs_precision),
            "%7s" % self.format(self.defp_1pct, self.efp_precision),
            "%7s" % self.format(self.defp_2pct, self.efp_precision),
            "%7s" % self.format(self.defp_5pct, self.efp_precision),
            "%7s" % self.format(self.defp_10pct, self.efp_precision),
            "%7s" % self.format(self.defp_20pct, self.efp_precision),
            "%7s" % self.format(self.eff_1pct, self.eff_precision),
            "%7s" % self.format(self.eff_2pct, self.eff_precision),
            "%7s" % self.format(self.eff_5pct, self.eff_precision),
            "%7s" % self.format(self.eff_10pct, self.eff_precision),
            "%7s" % self.format(self.eff_20pct, self.eff_precision),
        )
        out_fh.write(line)

        line = """
Enrichment Factors with respect to N%% actives recovered.
%% Actives |%s|%s|%s|%s|%s|%s|%s|
EF        |%s|%s|%s|%s|%s|%s|%s|
EF*       |%s|%s|%s|%s|%s|%s|%s|
EF'       |%s|%s|%s|%s|%s|%s|%s|
FOD       |%s|%s|%s|%s|%s|%s|%s|
        """ % (
            "%7s" % "40%",
            "%7s" % "50%",
            "%7s" % "60%",
            "%7s" % "70%",
            "%7s" % "80%",
            "%7s" % "90%",
            "%7s" % "100%",
            "%7s" % self.format(self.ef_40, self.ef_precision),
            "%7s" % self.format(self.ef_50, self.ef_precision),
            "%7s" % self.format(self.ef_60, self.ef_precision),
            "%7s" % self.format(self.ef_70, self.ef_precision),
            "%7s" % self.format(self.ef_80, self.ef_precision),
            "%7s" % self.format(self.ef_90, self.ef_precision),
            "%7s" % self.format(self.ef_100, self.ef_precision),
            "%7s" % self.format(self.efs_40, self.efs_precision),
            "%7s" % self.format(self.efs_50, self.efs_precision),
            "%7s" % self.format(self.efs_60, self.efs_precision),
            "%7s" % self.format(self.efs_70, self.efs_precision),
            "%7s" % self.format(self.efs_80, self.efs_precision),
            "%7s" % self.format(self.efs_90, self.efs_precision),
            "%7s" % self.format(self.efs_100, self.efs_precision),
            "%7s" % self.format(self.efp_40, self.efp_precision),
            "%7s" % self.format(self.efp_50, self.efp_precision),
            "%7s" % self.format(self.efp_60, self.efp_precision),
            "%7s" % self.format(self.efp_70, self.efp_precision),
            "%7s" % self.format(self.efp_80, self.efp_precision),
            "%7s" % self.format(self.efp_90, self.efp_precision),
            "%7s" % self.format(self.efp_100, self.efp_precision),
            "%7s" % self.format(self.fod_40, self.fod_precision),
            "%7s" % self.format(self.fod_50, self.fod_precision),
            "%7s" % self.format(self.fod_60, self.fod_precision),
            "%7s" % self.format(self.fod_70, self.fod_precision),
            "%7s" % self.format(self.fod_80, self.fod_precision),
            "%7s" % self.format(self.fod_90, self.fod_precision),
            "%7s" % self.format(self.fod_100, self.fod_precision),
        )
        out_fh.write(line)

        out_fh.write('\n')
        out_fh.write('%s %s\n' % ('Rank'.ljust(6), 'Title'.ljust(73)))
        out_fh.write('%s %s\n' % ('-' * 6, '-' * 73))
        for rank in sorted(self.title_ranks):
            title = self.title_ranks[rank]
            out_fh.write('%s %s\n' % (str(rank).ljust(6), title.ljust(73)))
        out_fh.write('\n')

        if self.missing_active_titles:
            out_fh.write("Missing ranks for:\n")
            out_fh.write("\n".join(self.missing_active_titles))
            out_fh.write('\n')

        out_fh.write(footer)
        out_fh.write('\n')

        line = '=' * 80
        out_fh.write(line)
        out_fh.write('\n')

[docs]    def getCsvRows(self):
        """
        Return a list of two lists, the first inner list contains all metric
        names, the other contains all corresponding metric values.

        :return: a list of header and enrichment value tuples.
        :rtype: list

        """

        percentage_active = [int(pct * 100) for pct in self.percentage_active]
        percentage_ligand = [int(pct * 100) for pct in self.percentage_ligand]
        dict_keys = ("actives_in_top_pct", "pct_actives_in_top_pct",
                     "actives_in_top_pct_star", "pct_actives_in_top_pct_star",
                     "ef_pct", "efs_pct", "efp_pct", "def_pct", "defs_pct",
                     "defp_pct", "eff_pct", "ef_actives", "efs_actives",
                     "efp_actives", "fod_actives")
        value_storage = defaultdict(list)

        for pct in percentage_ligand:
            value_storage[dict_keys[0]].append(
                (f"# Actives {pct}%", getattr(self,
                                              f"actives_in_top_{pct}_pct")))
            value_storage[dict_keys[1]].append(
                (f"% Actives {pct}%",
                 getattr(self, f"pct_actives_in_top_{pct}_pct")))
            value_storage[dict_keys[2]].append(
                (f"# Actives {pct}%*",
                 getattr(self, f"actives_in_top_{pct}_pct_star")))
            value_storage[dict_keys[3]].append(
                (f"% Actives {pct}%*",
                 getattr(self, f"pct_actives_in_top_{pct}_pct_star")))
            value_storage[dict_keys[4]].append(
                (f"EF {pct}%",
                 self.format(getattr(self, f"ef_{pct}pct"), self.ef_precision)))
            value_storage[dict_keys[5]].append(
                (f"EF* {pct}%",
                 self.format(getattr(self, f"efs_{pct}pct"),
                             self.efs_precision)))
            value_storage[dict_keys[6]].append(
                (f"EF' {pct}%",
                 self.format(getattr(self, f"efp_{pct}pct"),
                             self.efp_precision)))
            value_storage[dict_keys[7]].append(
                (f"DEF {pct}%",
                 self.format(getattr(self, f"def_{pct}pct"),
                             self.ef_precision)))
            value_storage[dict_keys[8]].append(
                (f"DEF* {pct}%",
                 self.format(getattr(self, f"defs_{pct}pct"),
                             self.efs_precision)))
            value_storage[dict_keys[9]].append(
                (f"DEF' {pct}%",
                 self.format(getattr(self, f"defp_{pct}pct"),
                             self.efp_precision)))
            value_storage[dict_keys[10]].append(
                (f"Eff {pct}%",
                 self.format(getattr(self, f"eff_{pct}pct"),
                             self.eff_precision)))

        for pct in percentage_active:
            value_storage[dict_keys[11]].append(
                (f"EF {pct}% actives",
                 self.format(getattr(self, f"ef_{pct}"), self.ef_precision)))
            value_storage[dict_keys[12]].append(
                (f"EF* {pct}% actives",
                 self.format(getattr(self, f"efs_{pct}"), self.efs_precision)))
            value_storage[dict_keys[13]].append(
                (f"EF' {pct}% actives",
                 self.format(getattr(self, f"efp_{pct}"), self.efp_precision)))
            value_storage[dict_keys[14]].append(
                (f"FOD {pct}% actives",
                 self.format(getattr(self, f"fod_{pct}"), self.fod_precision)))

        # yapf: disable
        enrich_data = [('Actives file', self.actives),
                       ('Results file', self.results),
                       ('Total ligands(actives+decoys)', self.total_ligands),
                       ('Total actives', self.total_actives),
                       ('Number of ranked actives', len(self.active_ranks)),
                       ('BEDROC(alpha=160.9)', self.bedroc160_9),
                       ('BEDROC(alpha=160.9) alpha*Ra', self.alphaRa160_9),
                       ('BEDROC(alpha=20.0)', self.bedroc20),
                       ('BEDROC(alpha=20.0) alpha*Ra', self.alphaRa20),
                       ('BEDROC(alpha=8.0)', self.bedroc8_0),
                       ('BEDROC(alpha=8.0) alpha*Ra', self.alphaRa8_0),
                       ('ROC', self.roc),
                       ('RIE', self.rie),
                       ('Area under accumulation curve', self.auac),
                       ('Ave. Num. of outranking decoys',
                            self.ave_num_outranking_decoys)]
        # yapf: enable

        for k in dict_keys:
            enrich_data.extend(value_storage[k])

        rows = []
        rows.append([item[0] for item in enrich_data])
        rows.append([item[1] for item in enrich_data])
        return rows