Source code for schrodinger.application.bioluminate.protein

"""
Module to gather residue property data for proteins.

Copyright (c) Schrodinger, LLC. All rights reserved
"""

# Contributors: Joshua Williams

import errno
import itertools
import os
import re
from collections import OrderedDict
from past.utils import old_div
from shutil import rmtree

import numpy

from schrodinger import structure
from schrodinger.application.bioluminate import surfcomp
from schrodinger.application.prime import input as prime_input
from schrodinger.infra import mm
from schrodinger.infra.mm import MmException
from schrodinger.job import jobcontrol
from schrodinger.job.util import hunt
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.structutils import measure
from schrodinger.structutils import minimize
from schrodinger.utils import fileutils

_version = '$Revision: 0.0 $'

#- Imports -------------------------------------------------------------------

try:
    from schrodinger.application.prime.packages.multiloopbuilder import \
        MultiLoopBuilder
except:
    MultiLoopBuilder = None

try:
    from . import propka_parse
    from . import run_propka
except:
    propka_parse = None
    run_propka = None

#- Globals -------------------------------------------------------------------

# Used to define whether an atom is nonpolar (see atom_is_nonpolar function)
NONPOLAR_HEAVY_ELEMENTS = ['C', 'S']

ALL_RESIDUES = [
    'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'HIP', 'HID',
    'HIE', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR',
    'VAL'
]

# NOTE: Why are ARN, ASH, GLH, LYN not in the above list, while HIP/HID/HIE is?

SUPPORTED_RESIDUES = ALL_RESIDUES + ['ARN', 'ASH', 'GLH', 'LYN']

AA_3_LETTER_MAP = {
    resname: structure.RESIDUE_MAP_3_TO_1_LETTER[resname]
    for resname in SUPPORTED_RESIDUES
}

# Mapping of residue names from 1-letter to the "most standard" 3-letter name:
# This dict is needed to support 1-letter names in the user's input files.
AA_1_LETTER_MAP = structure.RESIDUE_MAP_1_TO_3_LETTER

# Removed unless we support: 'ARN', 'ASH', 'GLH', 'LYN'
POLAR_RESIDUES = [
    'ARG', 'ASP', 'GLU', 'HIS', 'ASN', 'GLN', 'LYS', 'SER', 'THR', 'TYR', 'HID',
    'HIE', 'HIP'
]

# Removed unless we support: 'CYX'
NONPOLAR_RESIDUES = [
    'PHE', 'LEU', 'ILE', 'TRP', 'VAL', 'MET', 'PRO', 'CYS', 'ALA'
]

# Separate list that includes non-polar residues which are not always
# applicable/meaningful in UI context (used only for SASA at the moment).
SASA_NONPOLAR_RESIDUES = NONPOLAR_RESIDUES + ['CYT']

# Removed unless we support: 'TYO'
AROMATIC_RESIDUES = ['PHE', 'TYR', 'TRP', 'HIS', 'HIE', 'HIP']

BASIC_RESIDUES = ['ARG', 'LYS', 'HIS', 'HIP']
ACIDIC_RESIDUES = ['ASP', 'GLU']
CHARGED_RESIDUES = BASIC_RESIDUES + ACIDIC_RESIDUES

# Removed unless we support: 'TYO', 'ASH', 'GLH', 'HIE'
NEUTRAL_RESIDUES = [
    'ALA', 'ASN', 'CYS', 'GLN', 'GLY', 'HID', 'HIE', 'ILE', 'LEU', 'MET', 'PHE',
    'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'
]

#TODO: Push to external JSON file
# Taken from component_interactions.py (Shawn Watts)
RESIDUE_ROTOR_COUNT = {
    'ALA': 0,
    'ARG': 4,  # Excludes C_epsilon N_zeta NH3 rotor.
    'ARN': 4,
    'ASN': 2,
    'ASH': 2,
    'ASP': 2,
    'CYS': 2,
    'GLN': 3,
    'GLU': 3,
    'GLH': 3,
    'GLY': 0,
    'HIS': 2,
    'HID': 2,
    'HIP': 2,
    'HIE': 2,
    'ILE': 2,  # Excludes C_gamma and C_delta trivial rotors.
    'LEU': 2,  # Excludes C_delta trivial rotor.
    'LYN': 4,  # Excludes C-NH3 rotor.
    'LYS': 4,  # Excludes C-NH3 rotor.
    'MET': 3,  # Excludes S-CH3 rotor.
    'PHE': 2,
    'PRO': 0,
    'SER': 2,  # Includes C-OH rotor.
    'THR': 2,  # Includes C-OH rotor, excludes C_gamma trivial rotor.
    'TRP': 2,
    'TYR': 3,  # Includes Phenol OH rotor.
    'VAL': 1,  # Excludes C_gamma trivial rotors.
}

#TODO: Push to external JSON file
# Taken from: J Mol Biol 157:105, 1982.
HYDROPATHY_SCALE = {
    'ALA': 1.8,
    'ARG': -4.5,
    'ARN': -4.5,  # Neutral-Arginine (Maestro-specific)
    'ASH': -3.5,  # Protonated Aspartic (Maestro-specific)
    'ASN': -3.5,  # Asparagine
    'ASP': -3.5,  # Aspartic acid
    'CYS': 2.5,
    'CYX': 2.5,  # Cysteine in sulphide linkage
    'CYT': 2.5,  # Ionised Cysteine (Maestro-specific)
    'GLH': -3.5,  # (Maestro-specific)
    'GLN': -3.5,
    'GLU': -3.5,
    'GLY': -0.4,
    'HIS': -3.2,
    'HID': -3.2,  # Histidine (Maestro-specific)
    'HIE': -3.2,  # Histidine (Maestro-specific)
    'HIP': -3.2,  # Histidine (Maestro-specific)
    'ILE': 4.5,
    'LEU': 3.8,
    'LYN': -3.9,  # Protonated Lysine (Maestro-specific)
    'LYS': -3.9,
    'MET': 1.9,
    'PHE': 2.8,
    'PRO': -1.6,
    'SER': -0.8,
    'SRO': -0.8,  # Ionized Serine (Maestro-specific)
    'SEP': -0.8,  # Phosphorylated serine (Maestro-specific)
    'THR': -0.7,
    'THO': -0.7,  # Ionized Threonine (Maestro-specific)
    'TPO': -0.7,  # Phosporylated Threonine (Maestro-specific)
    'TRP': -0.9,
    'TYR': -1.3,
    'TYO': -1.3,  # Ionized Tyrosine (Maestro-specific)
    'PTR': -1.3,  # Phosphorylated tyrosine (Maestro-specific)
    'VAL': 4.2,
}

CHAIN_NAME_REGEX = '(?P<chain>[a-zA-Z0-9_]{1})'

MUTATION_1_LETTER_RE = re.compile(
    CHAIN_NAME_REGEX + r"""
    :
    (?P<resnum>-?\d+)
    (?P<inscode>[a-zA-Z]{1})?  # optional
    \s?                         # optional
    (?P<mutations>[%s]+)?       # optional
    """ % (''.join(list(AA_1_LETTER_MAP))), re.VERBOSE)
MUTATION_3_LETTER_RE_BASE = CHAIN_NAME_REGEX + r"""
    :
    (?P<resnum>-?\d+)
    (?P<inscode>[a-zA-Z]{1})?  # optional
    \s?                        # optional
    (?P<mutations>(%s)(,%s)*)? # optional
    """
MUTATION_3_LETTER_RE = re.compile(
    MUTATION_3_LETTER_RE_BASE %
    ('|'.join(SUPPORTED_RESIDUES), '|,'.join(SUPPORTED_RESIDUES)), re.VERBOSE)

MUTATION_RE = re.compile(
    CHAIN_NAME_REGEX + r"""
    :
    (?P<resnum>-?\d+)
    (?P<inscode>[a-zA-Z]{1})?  # optional
    ->
    (?P<new_resname>(%s))
    """ % ('|'.join(SUPPORTED_RESIDUES)), re.VERBOSE)

PROP_CONVERSIONS = {
    'e_pot': 'Potential_energy',
    'e_internal': 'Internal_energy',
    'e_interaction': 'Interaction_energy',
    'prime_energy': 'Prime_Energy',
    'prime_coulomb': 'Prime_Coulomb',
    'prime_cov': 'Prime_Covalent',
    'prime_vdw': 'Prime_vdW',
    'prime_gb': 'Prime_Solv_GB',
    'prime_sa': 'Prime_Solv_SA',
    'prime_lipo': 'Prime_Lipo',
    'prime_hbond': 'Prime_Hbond',
    'prime_packing': 'Prime_Packing',
    'prime_selfcont': 'Prime_SelfCont',
    'affinity_coulomb': 'Affinity_Coulomb',
    'affinity_cov': 'Affinity_Covalent',
    'affinity_vdw': 'Affinity_vdW',
    'affinity_gb': 'Affinity_Solv_GB',
    'affinity_sa': 'Affinity_Solv_SA',
    'affinity_lipo': 'Affinity_Lipo',
    'affinity_hbond': 'Affinity_Hbond',
    'affinity_packing': 'Affinity_Packing',
    'affinity_selfcont': 'Affinity_SelfCont',
    'stability_coulomb': 'Stability_Coulomb',
    'stability_cov': 'Stability_Covalent',
    'stability_vdw': 'Stability_vdW',
    'stability_gb': 'Stability_Solv_GB',
    'stability_sa': 'Stability_Solv_SA',
    'stability_lipo': 'Stability_Lipo',
    'stability_hbond': 'Stability_Hbond',
    'stability_packing': 'Stability_Packing',
    'stability_selfcont': 'Stability_SelfCont',
    'stability_reference': 'Stability_Reference',
    'pka': 'pKa',
    'atomic_sasa_polar': 'Atomic_SASA_(polar)',
    'atomic_sasa_nonpolar': 'Atomic_SASA_(nonpolar)',
    'sasa_polar': 'SASA_(polar)',
    'sasa_nonpolar': 'SASA_(nonpolar)',
    'sasa_total': 'SASA_(total)',
    'hydropathy': 'Hydropathy',
    'rotatable': 'Total_rotatable_bonds',
    'vdw_surf_comp': 'vdW_Surface_Complementarity',
    # The following are queued in for inclusion but no backends have been added
    #    'complementarity'      : 'Complentarity',
    #    'solubility'           : 'Solubility',
    #    'aggregation'          : 'Aggregation',
}

#- Functions -----------------------------------------------------------------


[docs]def find_residue_atom(st, chain, resnum, inscode):
    # Find an atom from this residue in the CT:
    for a in st.atom:
        if a.chain == chain and a.resnum == resnum and a.inscode == inscode:
            return a
    raise ValueError("Failed to find residue: %s:%i%s" %
                     (chain, resnum, inscode))


[docs]def get_residue_asl(residue, ca=False):
    """
    Creates an ASL based on a residue's chain, residue number and inscode.
    The ASL can optionally only include the alpha carbon of the residue.

    :param residue: The residue to create an ASL for
    :type  residue: `schrodinger.structure._Residue`
    :raise RuntimeError: If the passed in residue is incorrect type

    :return: ASL expression for residue
    :rtype: str

    """
    if not isinstance(residue, structure._Residue):
        err = 'Residues must be a structure._Residue object'
        raise RuntimeError(err)

    # We need the extra "(" and ")". If you want to link these together
    # you will want to join with OR
    asl = '((chain.name "%s")' % residue.chain
    asl += ' AND (res.num %d)' % residue.resnum
    asl += ' AND (res.ins "%s")' % residue.inscode
    if ca:
        asl += ' AND (atom.ptype " CA "))'
    else:
        asl += ')'

    return asl


[docs]def get_residues_asl(residues, ca=False):
    """
    Creates an ASL based on a list of residue's chains, residue numbers
    and inscodes. The ASL can optionally only include the alpha carbon of the
    residue.

    :param residue: The residues to create an ASL for
    :type  residue: list or tuple of `schrodinger.structure._Residue`
    :raise RuntimeError: If residues are not a list or tuple
    :raise RuntimeError: If any passed in residues are incorrect type

    :return: ASL expression for all residues
    :rtype: str

    """
    residues_asls = []
    if not isinstance(residues, (list, tuple)):
        err = 'A list or tuple is required to be passed in to '
        err += '"get_residues_asl" function.'
        raise RuntimeError(err)
    # We are using _Residue objects
    for res in residues:
        residues_asls.append(get_residue_asl(res, ca=ca))

    # Join all the residues with an OR
    residues_asl = ' OR '.join(residues_asls)

    return residues_asl


[docs]def valid_asl(st, asl):
    """
    Returns True/False depending on whether the asl is a valid expression or
    not.

    """
    try:
        analyze.evaluate_asl(st, asl)
        return True
    except MmException:
        return False


[docs]def get_residues_within(st, residues, within=0.0, ca=False):
    """
    Returns a list of residues for `st` that are within `within`
    angstroms of each residue. If the `ca` keyword is True the within
    calculation will only look for alpha carbon in `residues`. This means
    that if `within` is set to 5.5 angstroms and there is only a single
    atom that belongs to a residue at that cutoff, the residue that the
    atom belongs to will be refined.

    :param st: Structure to evaluate and which all `residues` correspond
    :type  st: `schrodinger.structure.Structure`
    :param residues: All residues targeted for refinement
    :type  residues: list or tuple of `schrodinger.structure._Residue`
    :param within: Distance (angstroms) of residues to include in refinement
    :type  within: float
    :param ca: Use only alpha carbons to find residues within
    :type  ca: bool

    :returns: List of `schrodinger.structure._Residue` objects
    :rtype: list

    """
    residue_asls = get_residues_asl(residues, ca=ca)

    within_asl = 'within %.2f (%s)' % (within, residue_asls)
    atom_indices = analyze.evaluate_asl(st, within_asl)

    # Make a list (dict's values) of residues to refine retaining the order
    # passed in
    residue_map = {}
    for idx in atom_indices:
        atom = st.atom[idx]
        chain = atom.chain.strip() or '_'
        key = '%s:%s' % (chain, atom.resnum)
        # We create a key to make sure we only have one residue
        # since there is no way to do: if atom.getResidue() in list
        residue_map[key] = atom.getResidue()

    return list(residue_map.values())


[docs]def residue_is_polar(residue):
    """
    Tests whether a residue is polar

    :param residue: Residue to test
    :type  residue: `structure._Residue`

    :rtype: bool

    """
    pdb_name = residue.pdbres.strip()
    if pdb_name in POLAR_RESIDUES:
        return True
    else:
        return False


[docs]def residue_is_nonpolar(residue):
    """
    Tests whether a residue is nonpolar (for SASA)

    :param residue: Residue to test
    :type  residue: `structure._Residue`

    :rtype: bool

    """
    pdb_name = residue.pdbres.strip()
    if pdb_name in SASA_NONPOLAR_RESIDUES:
        return True
    else:
        return False


[docs]def atom_is_nonpolar(atom):
    """
    Returns true if the atom is considered non-polar. Here are the rules for
    non-polar atoms:

    - The atom's element is a C or S
    - The atom's element is a H and one bonded atom's element is C or S

    :type atom: `schrodinger.structure._StructureAtom`

    """
    if atom.element in NONPOLAR_HEAVY_ELEMENTS:
        return True

    if atom.element == 'H':
        for a in atom.bonded_atoms:
            if a.element in NONPOLAR_HEAVY_ELEMENTS:
                return True

    return False


#- Classes -------------------------------------------------------------------


[docs]class PrimeConfig(prime_input.Prime):
    """
    Class containing the methods to write Prime input files.
    NOTE THAT THIS ALWAYS USES OPLS2005
    """

    ALL_RESIDUES = 'all'

[docs]    def __init__(self, st_filename, set_defaults=True, **kwargs):

        self.st_filename = st_filename

        if set_defaults:
            self.defaults = {
                'STRUCT_FILE': st_filename,
                'JOB_TYPE': 'REFINE',
                'USE_CRYSTAL_SYMMETRY': 'no',
                'USE_RANDOM_SEED': 'no',
                'SEED': 0,
                'INT_DIEL': 1.00,
                'EXT_DIEL': 80.00,
                'SGB_MOD': 'vsgb2.0',
                'OPLS_VERSION': '2005',
                'USE_MEMBRANE': 'no'
            }
            for k, v in kwargs.items():
                if k in self.defaults:
                    self.defaults[k] = v
        else:
            self.defaults = {}

        super(PrimeConfig, self).__init__(kw=self.defaults)

[docs]    def addResidues(self, residues=None):
        """
        Adds residues to consider for refinement.  The passed in argument can
        take the form of:

        - ASL expression
        - List of `schrodinger.structure._Residue` objects
        - 'all'
        - None

        """
        if residues is None:
            self['SELECT'] = 'all'
        else:
            self['SELECT'] = 'pick'
            for i, res in enumerate(residues):
                self['RESIDUE_' + str(i)] = res

[docs]    def prepEnergy(self):

        self['PRIME_TYPE'] = 'ENERGY'

[docs]    def prepMinimize(self, residues=None):

        self['PRIME_TYPE'] = 'REAL_MIN'
        self['ENERGY_BREAKDOWN'] = 'yes'

        self.addResidues(residues)

[docs]    def prepResidue(self, residues=None):

        self['PRIME_TYPE'] = 'SIDE_PRED'
        self['NUM_SC_OUTPUT_STRUCT'] = 1
        self['RESIDUE_BACKMIN'] = 'yes'

        self.addResidues(residues)

[docs]    def prepSidechain(self, residues=None):

        self['PRIME_TYPE'] = 'SIDE_PRED'
        self['NUM_SC_OUTPUT_STRUCT'] = 1

        self.addResidues(residues)

[docs]    def prepSidechainCBeta(self, residues=None):

        self.prepSidechain(residues=residues)

        self['SAMPLE_CBETA'] = 'yes'
        self['MAXCONEANG'] = 30.0

[docs]    def prepSidechainBB(self, residues=None):

        self.prepSidechain(residues=residues)

        self['SAMPLE_BACKBONE'] = 'yes'
        self['BACKBONELEN'] = 3

[docs]    def prepActive(self, lig_id, residues=None):

        self['PRIME_TYPE'] = 'SITE_OPT'
        self['LIGAND'] = lig_id
        self['NPASSES'] = 1

        self.addResidues(residues)

[docs]    def prepLoop(self,
                 start_res=None,
                 end_res=None,
                 res_sphere=7.5,
                 maxcalpha=None,
                 protocol='LOOP_BLD',
                 loop2=None,
                 max_jobs=0,
                 residues=None):
        """
        :type start_res: string
        :param start_res: loop start residue, e.g. A:15

        :type end_res: string
        :param end_res: loop start residue, e.g. A:20

        :type res_sphere: float
        :param res_sphere: radius of nearby residue refinement

        :type maxcalpha: float
        :param maxcalpha: CA atom movement constraint

        :type protocol: string
        :param protocol: loop refinement protocol

        :type loop2: list
        :param loop2: the definition of the second loop, e.g. ['A:4','A:6']

        :type residues: None
        :param residues: Unused, kept for API compatibility

        :type max_jobs: int
        :param max_jobs: how many processes will be run simultaneously
        """

        self['PRIME_TYPE'] = protocol
        self['LOOP_0_RES_0'] = start_res
        self['LOOP_0_RES_1'] = end_res
        self['RES_SPHERE'] = res_sphere
        if maxcalpha is not None:
            self['MAX_CA_MOVEMENT'] = maxcalpha
        self['MIN_OVERLAP'] = 0.70
        self['NPASSES'] = 1
        if protocol == 'LOOP_PAIR':  # cooperative loop sampling
            if loop2 is None:
                raise ValueError('A second loop  must be defined')
            self['LOOP_1_RES_0'] = loop2[0]
            self['LOOP_1_RES_1'] = loop2[1]
        if max_jobs != 0:
            self['MAX_JOBS'] = max_jobs

[docs]    def prepAntibodyLoop(self,
                         start_res=None,
                         end_res=None,
                         cpus=1,
                         residues=None):

        self['PRIME_TYPE'] = 4
        self['LOOP_0_RES_0'] = start_res
        self['LOOP_0_RES_1'] = end_res
        self['RES_SPHERE'] = 5.0
        self['NUM_FIXED_STAGE'] = 5
        self['MAX_CA_REF1'] = 6.0
        self['MAX_CA_REF2'] = 3.0
        self['SGB_MOD'] = 'vsgb2.0'
        self['THREADS'] = 10
        self['NUM_OUTPUT_STRUCT'] = 10
        self['MAX_JOBS'] = cpus

        end_resnum = int(end_res.split(':')[-1])
        start_resnum = int(start_res.split(':')[-1])

        total_residues = (start_resnum - end_resnum) + 1

        if total_residues >= 12:
            self['MIN_OVERLAP'] = 0.50
            self['OFAC_INIT_STAGE'] = '0.40/0.45/0.5/0.55/0.60'
        else:
            self['MIN_OVERLAP'] = 0.60
            self['OFAC_INIT_STAGE'] = '0.50/0.55/0.60/0.65/0.70'

        del self['INT_DIEL']

[docs]    def prepBldStruct(self, jobname, dirname):

        self['JOB_TYPE'] = 'MODEL'
        self['QUERY_OFFSET'] = 0
        self['TEMPLATE_NAME'] = jobname
        self['%s_NUMBER' % jobname] = 1

        self['%s_STRUCT_FILE' % jobname] = '%s.ent' % jobname
        self['%s_ALIGN_FILE' % jobname] = '%s.aln' % jobname

        self['DIR_NAME'] = dirname
        self['TAILS'] = 0
        self['SIDE_OPT'] = 'true'
        self['MINIMIZE'] = 'true'
        self['BUILD_INSERTIONS'] = 'false'
        self['MAX_INSERTION_SIZE'] = 20
        self['BUILD_TRANSITIONS'] = 'true'
        self['BUILD_DELETIONS'] = 'true'
        self['KEEP_ROTAMERS'] = 'true'
        self['KNOWLEDGE_BASED'] = 'true'
        self['NUM_OUTPUT_STRUCT'] = 10


[docs]class PrimeStructure:

[docs]    def __init__(self, jobname):

        self.jobname = jobname

[docs]    def createTemplateFile(self, template_seq, filename=None):
        """ Writes a template PDB file as .ent """

        if not filename:
            filename = "%s.ent" % self.jobname

        with open(filename, "w") as fh:
            # The Residue.structure list keeps structure information
            # in PDB format, so we just need to write out these lines.
            for res in template_seq.residues:
                fh.writelines(res.structure)
            fh.writelines(["TER\n"])

        return filename

[docs]    def createAlignFile(self, reference_seq, template_seq, filename=None):
        """
        Writes an alignment file for the template. If no filename is
        supplied the file will be named <jobname>.aln.

        :param reference_seq: The reference sequence
        :type  reference_seq: `sequence<Sequence>`
        :param  template_seq: The template sequence
        :type   template_seq: `sequence<Sequence>`

        """
        if not filename:
            filename = "%s.aln" % self.jobname

        # Make a reference and template sequence line. Replace gaps
        # symbols with dots.
        reference_txt = "ProbeAA: "
        reference_txt += reference_seq.text().replace("~",
                                                      ".").replace("-", ".")
        reference_txt += '\n'

        template_txt = "Fold AA: "
        template_txt += template_seq.text().replace("~", ".").replace("-", ".")
        template_txt += '\n'

        with open(filename, 'w') as fh:
            fh.writelines(reference_txt)
            fh.writelines(template_txt)

        return filename


[docs]class PropkaError(Exception):
    """ A custom exception for any propka failures """


[docs]class OrderedResidueDict(OrderedDict):
    """ Creates an ordered dictionary for residues in a structure """

[docs]    def __init__(self, residues, default_value=None):

        super(OrderedResidueDict, self).__init__()

        for res in residues:
            self[res] = default_value


[docs]class PropertyCalculator:
    """
    Class for calculating properties of proteins and protein residues.

    Here is an example of how to calculate properties for a protein::

      from schrodinger import structure
      from schrodinger.application.bioluminate import protein

      # Get the input structure
      st = structure.Structure.read('receptor.maegz')

      # Define the properties to calculate
      calculations = [ 'e_pot', 'e_internal', 'e_interaction', 'prime_energy',
                       'pka', 'sasa_polar', 'sasa_nonpolar', 'sasa_total']

      # Create the calculator
      calculator = protein.PropertyCalculator(st, "my_calculator_jobname")

      # Calculate the properties
      properties = calculator.calculate(*calculations)

    In the example above the `properties` output would look something like
    this::

      properties = {
          'e_pot'         : 1573.4,
          'e_internal'    : 624.7,
          'e_interaction' : 994.8,
          'prime_energy'  : 744.2,
          'pka'           : 124.1,
          'sasa_polar',   : 3122.3,
          'sasa_nonpolar' : 271.1,
          'sasa_total'    : 3393.4
      }

    """
    # All the aggregate calculations available
    AGGREGATE_CALCULATIONS = [
        'e_pot',
        # FIXME: These 2 need to have methods for getting totals Ev:133399
        #        'e_internal',
        #        'e_interaction',
        'prime_energy',
        'pka',
        'sasa_polar',
        'sasa_nonpolar',
        'sasa_total',
        'hydropathy',
        'rotatable',
        'vdw_surf_comp',
        #        'complementarity',
        #        'solubility',
        #        'aggregation'
    ]

    # All the calculations available that iterate over each residue
    RESIDUE_CALCULATIONS = [
        'e_pot',
        'e_internal',
        'e_interaction',
        'pka',
        'sasa_polar',
        'sasa_nonpolar',
        'sasa_total',
        'hydropathy',
        'rotatable',
        'vdw_surf_comp',
        #        'complementarity',
        #        'solubility',
        #        'aggregation'
    ]

[docs]    def __init__(self,
                 struct,
                 jobname,
                 cleanup=True,
                 nbcutoff=14.0,
                 residues=None,
                 lig_asl=None):
        """
        Construct a `ProteinCalculator` class from a structure file and a
        jobname.

        :param struct: The protein structure or protein/ligand structures
        :type  struct: `schrodinger.structure.Structure` object

        :param jobname: The jobname that will be used for all calculations that
                        require output files.

        :param residues: An iterable of _Residue objects to analyze. If not
                         specified, all residues in the structure are considered.
        :type residues: Iterable of `schrodinger.structure._Residue` objects.

        :param lig_asl: The ASL for the ligand substructure. Used for
                        calculating the vdW surface complementarity.
        :type lig_asl: str
        """
        self.struct = struct
        self.jobname = jobname
        self._cleanup = cleanup
        self._min_nbcutoff = nbcutoff
        self._lig_asl = lig_asl

        self._minimizer = None
        """
        Private variable that stores the minimizer. We don't create it here
        since some applications that use this class will not need it.

        """

        if residues is None:
            # Make a list of the residues and use these to iterate over.
            self.residues = [res for res in self.struct.residue]
        else:
            self.residues = list(residues)

        #    self.struct._proxy

        # Properties that hold the calculated data. Each of these will be
        # an OrderedDict with the keys being the structure's residue objects.
        # We store these as ordered dicts so scripts using this can access the
        # calculated data in the same order as the residues in the protein. A
        # normal dict would work if the structure._ResidueIterable returned
        # the same residue object but it doesn't.
        self.e_pot_data = OrderedResidueDict(self.residues)
        self.e_internal_data = OrderedResidueDict(self.residues)
        self.e_interaction_data = OrderedResidueDict(self.residues)
        self.atomic_nonpolar_sasa_data = OrderedResidueDict(self.residues)
        self.atomic_polar_sasa_data = OrderedResidueDict(self.residues)
        self.sasa_data = OrderedResidueDict(self.residues)
        self.hydropathy_data = OrderedResidueDict(self.residues)
        self.rotatable_data = OrderedResidueDict(self.residues)
        self.vdw_surf_comp_data = OrderedResidueDict(self.residues)
        self.pka_data = OrderedResidueDict(self.residues)

        # These calculated properties only exist in aggregate form, there
        # is no value for individual residues.
        self.prime_energy_total = None
        self.e_pot_total = None
        self.sasa_total = None
        self.sasa_polar_total = None
        self.sasa_nonpolar_total = None
        self.hydropathy_total = None
        self.rotatable_total = None
        self.vdw_surf_comp_total = None
        self.pka_total = None

        # This is used for all SASA calculations
        self._atomic_sasa_list = None

        self._vdw_surf_comp_by_atom = None

        self.progress = (0, 0)
        """
        Variable that can be used to get the progress of calculations. This
        variable is only set in `self.calculateOverResidues`. Since that
        method returns a generator, each step can query `self.progress`
        to get a description of the progress.  This variable is a tuple
        with the form ( step, total steps ).

        """

    @property
    def minimizer(self):
        """ The minimizer used in energy calculations. """

        if not self._minimizer:
            self._minimizer = minimize.Minimizer(
                struct=self.struct,
                nonbonded_cutoff=self._min_nbcutoff,
                ffld_version=minimize.OPLS_2005,
                cleanup=False)  # SHARED-5335, BIOLUM-3602
            # Auto fixing of lewis causes charged cysteines
            # turning cleanup=False to restore old behavior

        return self._minimizer

    def _prepInscode(self):
        """
        Private method to renumber residues with inscodes so that
        propka can handle them.  The returned st is renumbered and
        the renumbering map is also returned and used in `self.setpKaData`

        """
        new_ct = self.struct.copy()

        problem_res = []
        used_resnums = set()
        for residue in new_ct.residue:
            if residue.inscode != ' ':
                problem_res.append(residue)
            used_resnums.add(residue.chain + ':' + str(residue.resnum))

        temp_resnums = {}
        for res in problem_res:
            old_name = res.chain + ':' + str(res.resnum) + res.inscode
            for proposed_resnum in range(9999, -9999, -1):
                proposed_name = res.chain + ':' + str(proposed_resnum)
                if proposed_name not in used_resnums:
                    for atom in res.atom:
                        atom.resnum = proposed_resnum
                        atom.inscode = ' '
                    temp_resnums[old_name] = proposed_name
                    used_resnums.add(proposed_name)
                    break

        return (new_ct, temp_resnums)

[docs]    def runpKa(self):
        """
        Runs PROPKA to get the pKa of all residues in the `self.struct`,
        then sets `self.pka_data`.

        """
        # PROPKA can get stuck in an infinite loop when there are severe
        # clashes. This clashes can be introduced in unforesoon ways, such
        # as multiconformer ligands where each conformer has a different
        # atom constituency, i.e. microheterogeneity for ligands.
        overlapping_atom_pairs = analyze.find_overlapping_atoms(
            self.struct,
            ignore_hydrogens=True,
            ignore_waters=True,
            dist_threshold=0.5)
        if len(overlapping_atom_pairs) > 5:
            raise PropkaError(
                "Multiple severe heavy clashes found. Aborting PROPKA calculation."
            )
        st, renum_map = self._prepInscode()

        # Find a jobname for PROPKA that is not in use yet:
        i = 0
        while True:
            i += 1
            propka_jobname = os.path.abspath("%s_propka-%i" % (self.jobname, i))
            ipdb = propka_jobname + ".pdb"
            if not os.path.isfile(ipdb):
                break  # This jobname is not in use yet

        st.write(ipdb)

        ifile = propka_jobname + ".propka_input"
        ofile = propka_jobname + ".pka"
        logfile = propka_jobname + ".log"

        # So that these files get cleaned up when the job completes:
        temp_files = [ipdb, ifile, ofile, logfile]

        try:
            stdout, stderr = run_propka.run_propka(ipdb)
        except Exception as err:
            raise PropkaError('Error running PROPKA:\n%s' % err)

        # Save output to a log file (for debugging):
        with open(logfile, "w") as fh:
            fh.write(str(stdout))
            fh.write(str(stderr))

        if stderr:
            raise PropkaError(
                f'Error running PROPKA. Log file: {logfile}.\n Stderr: {stderr}'
            )
        if not os.path.isfile(ofile):
            raise PropkaError('PROPKA output file not found. Log file: %s' %
                              logfile)

        # We are only interested in:
        #  'resname', 'resnum', 'chain', 'pKa'
        headers = propka_parse.SUMMARY_HEADER[:4]
        header, summary = propka_parse.get_summary(ofile, headers)

        self.setpKaData(summary, renum_map=renum_map)

        # Clean up the output files
        if self._cleanup:
            for f in temp_files:
                if os.path.isfile(f):
                    fileutils.force_remove(f)

[docs]    def getResiduepKa(self, residue):
        """
        Returns the pKa for specified residue

        :param residue: Residue to get internal energy for
        :type  residue: `structure._Residue`

        :rtype: float

        """
        if not any(self.pka_data.values()):
            self.runpKa()

        return self.pka_data.get(residue)

[docs]    def getTotalpKa(self):
        """
        Gets the sum of the pKa values for the protein.

        :rtype: float

        """
        if self.pka_total is not None:
            return self.pka_total

        if not any(self.pka_data.values()):
            self.runpKa()

        values = [float(v) for v in self.pka_data.values() if v is not None]

        self.pka_total = sum(values)

        return self.pka_total

[docs]    def setpKaData(self, summary, renum_map=None):
        """
        Compares residues from the PROPKA summary with the residues in
        `self.residues` and when matches are found the summary's pKa
        is set for that residue in `self.pka_data`

        """
        for res in self.residues:
            maestro_resid = '%s:%s' % (res.chain, res.resnum)

            if renum_map:
                name = res.chain + ':' + str(res.resnum) + res.inscode
                match = renum_map.get(name)
                if match:
                    maestro_resid = match

            # PROPKA will assign a chain "_" if none is found
            maestro_resid = maestro_resid.replace(' ', '_')
            for idx, row in enumerate(summary):
                # Skip terminals
                resname = row[0]
                if resname == 'C-':  # or resname == 'N+':
                    # NOTE: We need to include the N+ terminal to fix
                    # PPREP-925. This is because PROPKA actually marks
                    # the first residue in the chain as "N+", no matter
                    # what type it is.  It's different for "C-" residues.
                    continue

                resnum = row[1]
                chain = row[2]
                pKa = row[3]

                propka_resid = '%s:%s' % (chain, resnum)

                if propka_resid == maestro_resid:
                    self.pka_data[res] = pKa
                    del summary[idx]
                    break

[docs]    def getTotalPrimeEnergy(self):
        """
        Run Prime Minimization on `self.struct`.  This will launch a job
        using job control. After the job completes the total energy will be
        taken from the first CT using the "r_psp_Prime_Energy" property.

        :returns: Prime energy of protein
        :rtype: float

        """
        if self.prime_energy_total is not None:
            return self.prime_energy_total

        refiner = Refiner(self.struct)
        output_st = refiner.runPrimeMinimization(self.jobname)

        self.prime_energy_total = output_st.property.get('r_psp_Prime_Energy')

        if self._cleanup:
            refiner.clean()

        return self.prime_energy_total

[docs]    def getPrimeEnergyByResidues(self, residues):
        """
        Run Prime Minimization on `self.struct` only minimizing the residues
        in `residues`.  This will launch a job using job control. After the
        job completes the total energy will be taken from the first CT using
        the "r_psp_Prime_Energy" property.

        :param residues: Residues to minimize
        :type  residues: list of `residues<structure._Residue>`

        :returns: Prime energy of protein
        :rtype: float

        """
        internal_residues = set()
        for res in residues:
            resnum = res.resnum
            residx = int(resnum) - 1
            internal_residues.add(self.residues[residx])

        refiner = Refiner(self.struct, residues=internal_residues)
        output_st = refiner.runPrimeMinimization(self.jobname)

        prime_energy_total = output_st.property.get('r_psp_Prime_Energy')

        if self._cleanup:
            refiner.clean()

        return prime_energy_total

[docs]    def getResiduePotentialEnergy(self, residue):
        """
        Return the potential energy for a residue.

        :param residue: Residue to get potential energy for
        :type  residue: `structure._Residue`

        :rtype: float

        """
        e_pot = self.e_pot_data.get(residue)

        if e_pot is None:
            self._calculateEnergiesByResidue(residue)

        e_pot = self.e_pot_data[residue]

        return e_pot

[docs]    def getPotentialEnergyGenerator(self):
        """
        Return a generator that iterates over each residue in `self.struct`
        yielding the `schrodinger.structure._Residue` object and it's
        potential energy.

        :rtype: generator
        :see: `schrodinger.structutils.minimize.Minimizer.getSelfEnergy`
        :see: `schrodinger.structutils.minimize.Minimizer.getInteractionEnergy`

        """
        for res in self.residues:
            e_pot = self.getResiduePotentialEnergy(res)
            yield (res, e_pot)

[docs]    def getTotalPotentialEnergy(self):
        """
        Get the potential energy of `self.struct` which is calculated using
        `schrodinger.structutils.minimize.Minimizer`. The potential energy
        is the sum of the internal energies and the interaction energies.

        :return: Total potential energy of all the residues
        :rtype: float

        :see: `schrodinger.structutils.minimize.Minimizer.getSelfEnergy`
        :see: `schrodinger.structutils.minimize.Minimizer.getInteractionEnergy`

        """
        if self.e_pot_total is None:
            self.e_pot_total = self.minimizer.getEnergy()

        return self.e_pot_total

[docs]    def getResidueInternalEnergy(self, residue):
        """
        Return the residue's internal energy.

        :param residue: Residue to get internal energy for
        :type  residue: `structure._Residue`

        :rtype: float
        :see: `schrodinger.structutils.minimize.Minimizer.getSelfEnergy`

        """

        e_internal = self.e_internal_data.get(residue)

        if e_internal is None:
            self._calculateEnergiesByResidue(residue)

        e_internal = self.e_internal_data[residue]
        return e_internal

[docs]    def getInternalEnergyGenerator(self):
        """
        Return a generator that iterates over each residue in `self.struct`.
        This yields the `schrodinger.structure._Residue` object and it's
        internal energy.

        :rtype: generator
        :see: `schrodinger.structutils.minimize.Minimizer.getSelfEnergy`

        """
        for res in self.residues:
            e_internal = self.getResidueInternalEnergy(res)
            yield (res, e_internal)

    def _calculateEnergiesByResidue(self, res):

        #DEBUG print "_calculateEnergiesByResidue() called"

        min = self.minimizer

        atom_subset = res.getAtomIndices()

        e_bonded = sum(min.getBondedEnergies(atom_subset))

        (intra_nb_energy,
         inter_nb_energy) = min.getNonBondedEnergies(atom_subset)
        e_internal = e_bonded + intra_nb_energy
        e_interact = inter_nb_energy

        self.e_interaction_data[res] = e_interact
        self.e_internal_data[res] = e_internal
        self.e_pot_data[res] = e_interact + e_internal

[docs]    def getResidueInteractionEnergy(self, residue):
        """
        Return the residue's interaction energy.

        :param residue: Residue to get interaction energy for
        :type  residue: `structure._Residue`

        :rtype: float
        :see: `schrodinger.structutils.minimize.Minimizer.getInteractionEnergy`

        """

        e_interact = self.e_interaction_data.get(residue)

        if e_interact is None:
            self._calculateEnergiesByResidue(residue)

        e_interact = self.e_interaction_data[residue]
        return e_interact

[docs]    def getInteractionEnergyGenerator(self):
        """
        Return a generator that iterates over each residue in `self.struct`.
        This yields the `schrodinger.structure._Residue` object and it's
        interaction energy.

        :rtype: generator
        :see: `schrodinger.structutils.minimize.Minimizer.getInteractionEnergy`

        """
        for res in self.residues:
            e_interact = self.getResidueInteractionEnergy(res)
            yield (res, e_interact)

    def _calculateAtomicSasaList(self):
        """
        Calculate SASA for all atoms in the structure, and save them.
        """
        self._atomic_sasa_list = analyze.calculate_sasa_by_atom(self.struct)

    def _sumSasaForAtoms(self, atoms=None):
        """
        Sum up the pre-calculated SASAs for the given atoms. If `atoms` is
        `None`, the sum of all atoms will be returned.

        """
        # Make sure we have the data we need.
        if self._atomic_sasa_list is None:
            self._calculateAtomicSasaList()

        if atoms is None:
            return sum(self._atomic_sasa_list)

        sasa = 0.0
        for anum in atoms:
            sasa += self._atomic_sasa_list[anum - 1]
        return sasa

[docs]    def getResidueAtomicPolarSASA(self, residue, sidechain=False):
        """
        Returns SASA for all polar atoms in residue

        :param residue: Residue to get atomic polar SASA contribution for
        :type  residue: `structure._Residue`
        :param sidechain: Only consider sidechain atoms when calculating SASA
        :type  sidechain: bool

        :rtype: float

        """
        # If we already have it calculated return it
        sasa = self.atomic_polar_sasa_data.get(residue)
        if sasa is not None:
            return sasa

        # Get the polar atom indices for the residue
        atom_idx = [
            str(a.index) for a in residue.atom if not atom_is_nonpolar(a)
        ]

        if not atom_idx:
            return 0.0

        # Create the asl expressions
        asl = '(atom.num %s)' % ','.join(atom_idx)
        if sidechain:
            asl += ' and (sidechain)'

        # Evaluate the asl expressions to get the atoms to calculate the
        # SASA for
        polar_indices = analyze.evaluate_asl(self.struct, asl)
        sasa = self._sumSasaForAtoms(polar_indices)

        self.atomic_polar_sasa_data[residue] = sasa

        return sasa

[docs]    def getAtomicPolarSASAGenerator(self, sidechain=False):
        """
        Returns a generator that yields the `schrodinger.structure._Residue`
        object and its calculated SASA for only the polar atoms in each
        residue in `self.struct`.

        :param sidechain: Only consider sidechain atoms when calculating SASA
        :type  sidechain: bool

        :rtype: generator

        """
        for res in self.residues:
            sasa = self.getResidueAtomicPolarSASA(res, sidechain=sidechain)
            yield (res, sasa)

[docs]    def getResidueAtomicNonPolarSASA(self, residue, sidechain=False):
        """
        Returns SASA for only the nonpolar atoms in residue

        :param residue: Residue to get atomic nonpolar SASA contribution for
        :type  residue: `structure._Residue`
        :param sidechain: Only consider sidechain atoms when calculating SASA
        :type  sidechain: bool

        :rtype: float

        """
        # If we already have it calculated return it
        sasa = self.atomic_nonpolar_sasa_data.get(residue)
        if sasa is not None:
            return sasa

        # Get the polar atom indices for the residue
        atom_idx = [str(a.index) for a in residue.atom if atom_is_nonpolar(a)]

        if not atom_idx:
            return 0.0

        # Create the asl expressions
        asl = '(atom.num %s)' % ','.join(atom_idx)
        if sidechain:
            asl += ' and (sidechain)'

        # Evaluate the asl expressions to get the atoms to calculate the
        # SASA for
        nonpolar_indices = analyze.evaluate_asl(self.struct, asl)
        sasa = self._sumSasaForAtoms(nonpolar_indices)

        self.atomic_nonpolar_sasa_data[residue] = sasa

        return sasa

[docs]    def getAtomicNonPolarSASAGenerator(self, sidechain=False):
        """
        Returns a generator that yields the `schrodinger.structure._Residue`
        object and its calculated SASA for only the nonpolar atoms in each
        residue in `self.struct`.

        :param sidechain: Only consider sidechain atoms when calculating SASA
        :type  sidechain: bool

        :rtype: generator

        """
        for res in self.residues:
            # Get the polar atom indices for the residue
            nonpolar_res_atoms = [
                str(a.index) for a in res.atom if atom_is_nonpolar(a)
            ]
            if nonpolar_res_atoms:
                # If this residue has non-polar atoms
                # Create the asl expressions
                asl = '(atom.num %s)' % ','.join(nonpolar_res_atoms)
                if sidechain:
                    asl += ' and (sidechain)'

                # Evaluate the asl expressions to get the atoms to calculate the
                # SASA for
                nonpolar_indices = analyze.evaluate_asl(self.struct, asl)
                nonpolar_sasa = self._sumSasaForAtoms(nonpolar_indices)
            else:
                nonpolar_sasa = 0.0

            self.atomic_nonpolar_sasa_data[res] = nonpolar_sasa

            yield (res, nonpolar_sasa)

[docs]    def getResidueSASA(self, residue, sidechain=False):
        """
        Returns the SASA for residue.

        :param residue: Residue to get SASA for
        :type  residue: `structure._Residue`
        :param sidechain: Only consider sidechain atoms when calculating SASA
        :type  sidechain: bool

        :rtype: float

        """
        # If we already have it calculated return it
        sasa = self.sasa_data.get(residue)
        if sasa is not None:
            return sasa

        res_atom_indices = [str(a.index) for a in residue.atom]

        if not res_atom_indices:
            return 0.0

        asl = '(atom.num %s)' % ','.join(res_atom_indices)
        if sidechain:
            asl += ' and (sidechain)'

        atom_indices = analyze.evaluate_asl(self.struct, asl)
        sasa = self._sumSasaForAtoms(atom_indices)

        self.sasa_data[residue] = sasa

        return sasa

[docs]    def getSASAPolarGenerator(self, sidechain=False):
        """
        Returns a generator that yields the `schrodinger.structure._Residue`
        object and its calculated SASA for each polar residue in
        `self.struct`.

        :param sidechain: Only consider sidechain atoms when calculating SASA
        :type  sidechain: bool

        :rtype: generator

        """
        for res in self.residues:
            if not residue_is_polar(res):
                continue

            sasa = self.getResidueSASA(res, sidechain=sidechain)

            yield (res, sasa)

[docs]    def getTotalSASAPolar(self, sidechain=False):
        """
        Returns the total approximate solvent accessible surface area for
        all polar residues.

        :param sidechain: Only consider sidechain atoms when calculating SASA
        :type  sidechain: bool
        :rtype: float

        """
        # If we already have it calculated return it
        if self.sasa_polar_total is None:
            asl = '(res. %s)' % ','.join(POLAR_RESIDUES)
            if sidechain:
                asl += ' and (sidechain)'

            atom_indices = analyze.evaluate_asl(self.struct, asl)

            if not atom_indices:
                return 0.0

            self.sasa_polar_total = self._sumSasaForAtoms(atom_indices)

        return self.sasa_polar_total

[docs]    def getSASANonPolarGenerator(self, sidechain=False):
        """
        Returns a generator that yields the `schrodinger.structure._Residue`
        object and its calculated SASA for each nonpolar residue in
        `self.struct`.

        :param sidechain: Only consider sidechain atoms when calculating SASA
        :type  sidechain: bool

        :rtype: generator

        """
        for res in self.residues:
            if not residue_is_nonpolar(res):
                continue

            sasa = self.getResidueSASA(res, sidechain=sidechain)

            yield (res, sasa)

[docs]    def getTotalSASANonPolar(self, sidechain=False):
        """
        Returns the total approximate solvent accessible surface area for all
        non-polar residues.

        :param sidechain: Only consider sidechain atoms when calculating SASA
        :type  sidechain: bool
        :rtype: float

        """
        # If we already have it calculated return it
        if self.sasa_nonpolar_total is None:
            asl = '(res. %s)' % ','.join(SASA_NONPOLAR_RESIDUES)
            if sidechain:
                asl += ' and (sidechain)'

            atom_indices = analyze.evaluate_asl(self.struct, asl)

            if not atom_indices:
                return 0.0

            self.sasa_nonpolar_total = self._sumSasaForAtoms(atom_indices)

        return self.sasa_nonpolar_total

[docs]    def getSASAGenerator(self, sidechain=False):
        """
        Returns a generator that yields the `schrodinger.structure._Residue`
        object and its calculated SASA for each residue in `self.struct`.

        :param sidechain: Only consider sidechain atoms when calculating SASA
        :type  sidechain: bool

        :rtype: generator

        """
        for res in self.residues:
            sasa = self.getResidueSASA(res, sidechain=sidechain)
            yield (res, sasa)

[docs]    def getTotalSASA(self, sidechain=False):
        """
        Returns the total approximate solvent accessible surface area for all
        residues.

        :param sidechain: Only consider sidechain atoms when calculating SASA
        :type  sidechain: bool
        :rtype: float

        """
        # If we already have it calculated return it
        if self.sasa_total is None:
            if sidechain:
                asl = '(sidechain)'  # NOTE: everything that matches "sidechain" is already part of a "protein".
            else:
                asl = "(protein)"  # See BIOLUM-1219
            atom_indices = analyze.evaluate_asl(self.struct, asl)
            self.sasa_total = self._sumSasaForAtoms(atom_indices)

        return self.sasa_total

[docs]    def getResidueHydropathy(self, residue, sidechain=False):
        """
        Returns hydropathy value for residue

        :param residue: Residue to get hydropathy value for
        :type  residue: `structure._Residue`
        :param sidechain: Only consider sidechain atoms when calculating SASA
        :type  sidechain: bool

        :rtype: float

        """
        # Make sure we have the SASA data for the residue, it is needed for
        # hydropathy value
        res_sasa = self.sasa_data.get(residue)
        if res_sasa is None:
            res_sasa = self.getResidueSASA(residue, sidechain=sidechain)

        # Get a structure object contianing only the residue atoms
        res_st = residue.extractStructure()

        # Get the SASA for the residue without any neighboring atoms
        solo_res_sasa = sum(analyze.calculate_sasa_by_atom(res_st))

        pdb_name = residue.pdbres.strip()
        hval = HYDROPATHY_SCALE.get(pdb_name, 0.0)

        calculated_hval = (old_div(res_sasa, solo_res_sasa)) * hval

        self.hydropathy_data[residue] = calculated_hval

        return calculated_hval

[docs]    def getHydropathyGenerator(self, sidechain=False):
        """
        Returns a generator that yields the `schrodinger.structure._Residue`
        object and its calculated hydropathy for each residue in
        `self.struct`.

        :param sidechain: Only consider sidechain atoms when calculating SASA
        :type  sidechain: bool
        :rtype: generator

        """
        for res in self.residues:
            calculated_hval = self.getResidueHydropathy(res,
                                                        sidechain=sidechain)
            yield (res, calculated_hval)

[docs]    def getTotalHydropathy(self, sidechain=False):
        """
        Returns the total calculated hydropathy value for all residues.

        :param sidechain: Only consider sidechain atoms when calculating SASA
        :type  sidechain: bool
        :rtype: float

        """
        if self.hydropathy_total is None:
            self.hydropathy_total = 0.0
            for res, value in self.getHydropathyGenerator(sidechain=sidechain):
                self.hydropathy_total += value

        return self.hydropathy_total

[docs]    def getResidueRotatableBonds(self, residue):
        """
        Return the number of rotors for a residue.

        :param residue: Residue to get rotor count for
        :type  residue: `structure._Residue`
        :rtype: int

        """
        # If we already have it calculated return it
        count = self.rotatable_data.get(residue)
        if count is not None:
            return count

        resname = residue.pdbres.strip()
        count = RESIDUE_ROTOR_COUNT.get(resname, 0)

        self.rotatable_data[residue] = count

        return count

[docs]    def getRotatableBondsGenerator(self):
        """
        Returns a generator that yields the `schrodinger.structure._Residue`
        object and its number of rotors for each residue in `self.struct`.

        :rtype: generator

        """
        for res in self.residues:
            count = self.getResidueRotatableBonds(res)
            yield (res, count)

[docs]    def getTotalRotatableBonds(self):
        """
        :return: Sum of rotors for all residues.
        :rtype: float

        """
        if self.rotatable_total is None:
            self.rotatable_total = 0.0
            for res, count in self.getRotatableBondsGenerator():
                self.rotatable_total += count

        return self.rotatable_total

    def _calculateVDWSurfComp(self):
        if self._lig_asl is None:
            self._vdw_surf_comp_by_atom = {}
            return

        ligand_atoms = analyze.evaluate_asl(self.struct, self._lig_asl)
        if not ligand_atoms or len(ligand_atoms) == self.struct.atom_total:
            self._vdw_surf_comp_by_atom = {}
        else:
            self._vdw_surf_comp_by_atom = surfcomp.calc_complementarity_by_atom(
                self.struct, ligand_atoms, grid_spacing=0.6)

[docs]    def getTotalSurfComp(self):
        """
        :return: Median of vdW surface complementarity values for all
                 surface points for all residues.
        :rtype: float

        """

        if self.vdw_surf_comp_total is None:
            self._calculateVDWSurfComp()

            sc_values = []
            for values in self._vdw_surf_comp_by_atom.values():
                sc_values += values

            if sc_values:
                self.vdw_surf_comp_total = numpy.median(sc_values)
            else:
                self.vdw_surf_comp_total = 0.0

        return self.vdw_surf_comp_total

[docs]    def getResidueSurfComp(self, residue):
        """
        :return: Median of vdW surface complementarity values for all
                 accounted points on the surface of this residue.
        :rtype: float

        :param residue: Residue to get the value for
        :type  residue: `structure._Residue`

        """
        if self._vdw_surf_comp_by_atom is None:
            self._calculateVDWSurfComp()
        # If we already have it calculated return it
        sc = self.vdw_surf_comp_data.get(residue)
        if sc is not None:
            return sc

        sc_values = []
        for atom in residue.atom:
            sc_values += self._vdw_surf_comp_by_atom[atom.index]
        if sc_values:
            sc = numpy.median(sc_values)
        else:
            sc = 0.0

        self.vdw_surf_comp_data[residue] = sc

        return sc

[docs]    def calculateOverResidues(self, *properties):
        """
        Helper method that returns a generator which will calculate multiple
        properties for `self.struct`. All results will be returned in a
        tuple with the form ( `structure._Residue`, calc dict ). Here
        is a list of valid properties to calculate:

           - e_pot
           - e_internal
           - e_interaction
           - pka
           - sasa_polar
           - sasa_nonpolar
           - sasa_total
           - hydropathy
           - rotatable
           - vdw_surf_comp

        :param properties: Properties to calculate
        :type  properties: str (see PropertyCalculator.RESIDUE_CALCULATIONS)

        :raise KeyError: If a property passed in is invalid
        :return: Generator that yields `structure._Residue` and dict where keys
                 are properties passed in and values are the total value of the
                 property for the protein. e.g (_Residue, {'e_pot':1324.3})
        :rtype: generator

        """
        prop_map = {
            'e_pot': self.getResiduePotentialEnergy,
            'e_internal': self.getResidueInternalEnergy,
            'e_interaction': self.getResidueInteractionEnergy,
            'pka': self.getResiduepKa,
            'sasa_polar': self.getResidueAtomicPolarSASA,
            'sasa_nonpolar': self.getResidueAtomicNonPolarSASA,
            'sasa_total': self.getResidueSASA,
            'hydropathy': self.getResidueHydropathy,
            'rotatable': self.getResidueRotatableBonds,
            'vdw_surf_comp': self.getResidueSurfComp,
        }

        # Make sure the passed in properties are valid
        for prop in properties:
            if prop not in self.RESIDUE_CALCULATIONS:
                props_str = ", ".join(self.RESIDUE_CALCULATIONS)
                msg = 'Invalid property %s. Valid properties: %s' % (prop,
                                                                     props_str)
                raise KeyError(msg)

        # Set the total number of calculations that will take place
        total_steps = len(properties) * len(self.residues)
        step = 0
        for res in self.residues:
            #DEBUG print 'Calculating %s for %s' % (', '.join(properties), res)
            calculations = {}
            for prop in properties:
                if prop == 'vdw_surf_comp' and self._lig_asl is None:
                    raise KeyError(
                        'Ligand ASL must be defined to calculate vdw_surf_comp')
                else:
                    calculations[prop] = prop_map.get(prop)(res)

                # Set the current progress
                step += 1
                self.progress = (step, total_steps)

            yield (res, calculations)

[docs]    def calculate(self, *properties):
        """
        Helper method to calculate multiple properties for `self.struct`.
        All results will be returned in a dict where the keys are each of
        the properties in `properties`, and their values are the values
        returned from their corresponding method. Here is a list of valid
        properties to calculate:

           - e_pot
           - sasa_polar
           - sasa_nonpolar
           - sasa_total
           - prime_energy
           - pka
           - hydropathy
           - rotatable
           - vdw_surf_comp

        :param properties: Properties to calculate
        :type  properties: str (see PropertyCalculator.AGGREGATE_CALCULATIONS)

        :raise KeyError: If a property passed in is invalid
        :return: Dict where keys are properties passed in and values are
                 the total value of the property for the protein.
                 e.g {'e_pot': 1324.3, 'sasa_total': 1846.9}
        :rtype: dict

        """
        prop_map = {
            'e_pot': self.getTotalPotentialEnergy,
            'prime_energy': self.getTotalPrimeEnergy,
            'pka': self.getTotalpKa,
            'sasa_polar': self.getTotalSASAPolar,
            'sasa_nonpolar': self.getTotalSASANonPolar,
            'sasa_total': self.getTotalSASA,
            'hydropathy': self.getTotalHydropathy,
            'rotatable': self.getTotalRotatableBonds,
            'vdw_surf_comp': self.getTotalSurfComp,
        }
        # Make sure the passed in properties are valid
        valid = all((p in self.AGGREGATE_CALCULATIONS) for p in properties)
        if not valid:
            props_str = ", ".join(self.AGGREGATE_CALCULATIONS)
            raise KeyError('Invalid properties supplied. Valid properties: '
                           '%s.' % props_str)

        data = {}
        for prop in properties:
            #DEBUG print 'Calculating %s...' % prop
            data[prop] = prop_map.get(prop)()

        return data

[docs]    def getTotalAggregation(self):

        return

[docs]    def getTotalSolubility(self):

        return

[docs]    def getTotalComplementarity(self):

        return


[docs]class Mutation:
    """
    Helper class for `Mutator`. This will store a mutated structure
    and the resides that were mutated.

    """

[docs]    def __init__(self, ref_struct, struct, residue_map):
        """
        :param ref_struct: Reference structure (potentially "idealized")
        :type  ref_struct: `schrodinger.structure.Structure`

        :param struct: Mutated structure
        :type  struct: `schrodinger.structure.Structure`

        :param residue_map: The mapping of reference residues to their mutated residues
        :type  residue_map: dict instance of reference and mutated
                `residues<schrodinger.structure._Residue>`

        """
        self.ref_struct = ref_struct
        self.struct = struct
        self.residue_map = residue_map


[docs]class Mutator:
    """
    Mutates a set of residues in a protein structure allowing concurrent
    mutations as well as the option to limit concurrent mutations to sequential
    residues only.

    Here is an example of a mutation of a Ser residue to: Asp, Glu, Asn, & Gln
    (one-letter codes are D, E, N, & Q respectively). The Ser residue is in
    chain A and has a residue number of 22.  This example will write a file
    named 'mutated_structures.maegz' that has the reference structure as
    the first CT and each mutation CT after that. Five total structures will
    be in the output file::

      from schrodinger import structure
      from schrodinger.application.bioluminate import protein

      # Get the input structure
      reference_st = structure.Structure.read('receptor.maegz')

      # Create the writer for the output file and append the reference
      writer = structure.StructureWriter('mutated_structures.maegz')
      writer.append(reference_st)

      # Define the residues and mutations
      residues = ['A:22']
      muts     = 'DENQ'

      # Get a compatible list of mutations. The above turns into
      # [('A', 22, 'DENQ')]
      mutations = protein.Mutator.convert_residue_list(residues, muts)

      # Construct the mutator
      mutator = protein.Mutator(st, mutations)

      # Loop over each mutation
      for mutation in mutator.generate():
          #
          mutated_structure = mutation.struct
          residue_map       = mutation.residue_map

          res_str = ", ".join(str(res) for res in residue_map.values())
          print 'Residues affected by this mutation: %s' % res_str

          # Do something with the mutated structure (refine maybe)

          writer.append(mutated_structure)


    @todo: Add logging
    """

    MUTATIONS_PROPERTY = "s_bioluminate_Mutations"
    UNFOLDED_PROPERTY = "r_bioluminate_Unfolded_Contribution"
    UNFOLDED_PROPERTY_PRIME = "r_bioluminate_Unfolded_Contribution_Prime"

    # Lookup for unfolded residue contributions where:
    #    unfolded_contribution = mut_residue - wt_residue
    #
    # *****
    # The unfolded state is represented by a Boltzmann-weighted energy of a
    # tripeptide conformational ensemble. For each tripeptide Gly-X-Gly (where X
    # is the wild type or mutant residue), Confgen was run with OPLS_2005 and
    # "water" MM-GB/SA implicit solvent to generate conformers.
    # *****
    # Units are kcal/mol.
    # The energies here should be negative
    #
    # In this the "Minimizer" list. To generate it, each conformation
    # was minimized with the Minimizer, and the endpoint energy was calculated.
    # Then the Boltzmann weighted average Prime energy for the ensemble of
    # structures was calculated, using an effective temperature of 25 degrees C.
    # These values were added as requested in BIOLUM-1317
    #
    GXG_DATA = {
        "ALA": -100.736,
        "ARG": -118.478,
        "ARN": -118.478,  # ARG +H
        "ASN": -137.153,
        "ASP": -149.255,
        "ASH": -149.255,  # ASP +H
        "CYS": -100.845,
        "GLN": -136.183,
        "GLU": -143.536,
        "GLH": -143.536,  # GLU +H
        "GLY": -105.658,
        "HIS": -104.977,
        "HIE": -104.977,
        "HID": -104.977,
        "HIP": -104.977,
        "ILE": -84.130,
        "LEU": -92.400,
        "LYS": -110.759,
        "LYN": -110.759,  # LYS -H
        "MET": -99.708,
        "PHE": -96.483,
        "PRO": -66.763,
        "SER": -96.365,
        "THR": -98.156,
        "TRP": -105.114,
        "TYR": -101.858,
        "VAL": -93.493,
    }

    #
    # In this the "Prime" list. To generate it, each conformation
    # was minimized with Prime, and the endpoint energy was calculated.
    # Then the Boltzmann weighted average Prime energy for the ensemble of
    # structures was calculated, using an effective temperature of 25 degrees C.
    # These values were added as requested in BIOLUM-1317
    #
    GXG_DATA_PRIME = {
        "ALA": -112.635,
        "ARG": -142.467,
        "ARN": -142.467,  # ARG +H
        "ASN": -156.375,
        "ASP": -154.559,
        "ASH": -154.559,  # ASP +H
        "CYS": -113.747,
        "GLN": -152.611,
        "GLU": -141.673,
        "GLH": -141.673,  # GLU +H
        "GLY": -116.263,
        "HIS": -121.600,
        "HIE": -121.600,
        "HID": -121.600,
        "HIP": -121.600,
        "ILE": -97.541,
        "LEU": -107.106,
        "LYS": -123.751,
        "LYN": -123.751,  # LYS -H
        "MET": -112.643,
        "PHE": -113.719,
        "PRO": -81.734,
        "SER": -112.693,
        "THR": -116.048,
        "TRP": -122.407,
        "TYR": -123.497,
        "VAL": -109.017,
    }

    # These are the supported residues in build.mutate
    SUPPORTED_BUILD_RESIDUES = [
        'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'HIP',
        'HIE', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP',
        'TYR', 'VAL'
    ]

    # FIXME add 'ARN', 'ASH', and 'GLH' to this list.

[docs]    def __init__(self,
                 ref_struct,
                 mutations,
                 concurrent=1,
                 sequential=False,
                 idealize=True):
        """

        :param ref_struct: The reference (starting) structure
        :type  ref_struct: `schrodinger.structure.Structure` instance

        :param  mutations: A list of the mutations to carry out on the
                `ref_struct`.  Each element of the list is a tuple of
                ("res num.", ["pdbnames"]) where "res num."
                is the residue number being altered and "pdbnames" is a list of
                the standard PDB residue names to mutate it to.
        :type mutations: List of tuples

        :param concurrent: Maximum concurrent mutations
        :type  concurrent: int

        :param sequential: Limit concurrent mutations to being sequential
        :type  sequential: bool

        :param idealize: Whether to idealize the reference structure by self-mutating
                the affected residues before calculating properties.
        :type  idealize: bool

        :raise RuntimeError: If concurrent is not between 1 and 99.

        :see: For easy creation of mutations variable
              `Mutator.convert_residue_list`
        """

        if concurrent < 1 or concurrent > 99:
            err = 'Invalid concurrent value: %s' % concurrent
            raise RuntimeError(err)

        # NOTE: This is the input structure without any modifications. Any
        # self-mutations and minimizations will be performed later.
        self.ref_struct = ref_struct
        # IMPORTANT: This class should never be modifying this structure

        self._mutations = None

        self.mutations = mutations
        self.concurrent = concurrent
        self.sequential = sequential
        self._idealize_reference = idealize

        # Make sure sequential is False if there is no concurrent
        # mutations being made
        if self.concurrent == 1:
            self.sequential = False

        self._validate_mutations_structure()

    def _validate_mutations_structure(self):
        """
        *Instance* method for validating mutations using checks relevant
        to actual reference structure.

        This is run after generating mutations at initialization of the Mutator instance.

        Other validation methods below DO NOT check or have access to the reference structure
        since they are used as static methods

        Checks:
        1. Only C terminal residues with OXT should be allowed
        """
        accepted_mutations = []
        for mutation in self.mutations:
            chain, resnum, inscode, resname = mutation
            # C terminal residues must have 'OXT', if capped, NMA will be terminal and shouldnt be mutated
            c_term_res = [
                res for res in self.ref_struct.residue
                if res.chain == chain.replace('_', ' ')
            ][-1]
            c_term_num = c_term_res.resnum
            c_term_inscode = c_term_res.inscode
            is_c_term = c_term_num == resnum and c_term_inscode == inscode
            if is_c_term:
                has_oxt = ' OXT' in [a.pdbname for a in c_term_res.atom]
                if not has_oxt:
                    print(
                        f"\nWARNING: Refusing to mutate uncharged C terminal residue {c_term_res}. Consider leaving charged or adding a capping group.\n"
                    )
                    continue
            accepted_mutations.append(mutation)
        self.mutations = accepted_mutations

[docs]    @staticmethod
    def validate_mutated_residues(residues):
        """
        Method for validating the residues used in mutations passed in to the
        `MutateProtein` class.

        :raise ValueError: If the 3-letter residue name is not supported
                           by the `build,mutate` method.

        """
        # TODO: Convert the return to raise a custom MutateProteinError. This
        # will help in letting front-end know why it fails.
        # TODO: Add validation for assuring chain and resnum are in self.struct

        # Catch residues not supported by the build.mutate method
        invalid = []
        for res in residues:
            if res not in Mutator.SUPPORTED_BUILD_RESIDUES:
                invalid.append(res)
        if invalid:
            msg = 'Unsupported residues: %s\n\n' % ', '.join(invalid)
            msg += 'The resnames for mutations must be one of: %s' %\
                    ','.join(Mutator.SUPPORTED_BUILD_RESIDUES)
            raise ValueError(msg)

[docs]    @staticmethod
    def validate_mutations(mutations):
        """
        Private method for validating the mutations passed in to the
        `MutateProtein` class.

        :raise ValueError: If the `mutations` passed in is not a list, if each
                item in the list is not a tuple, if the tuple is not of length 4
                (chain, resnum idx, inscode, mutation resnames), if the resnum is
                not an integer, or any of the 3-letter residue names in "mutation
                resnames" is not supported by the `build,mutate` method.

        """
        # TODO: Convert the return to raise a custom MutateProteinError.
        # This will help in letting front-end know why it fails.

        # TODO: Add validation for assuring chain and resnum are in self.struct

        if not type(mutations) == list:
            raise ValueError('Mutations must be a list')

        for mutation in mutations:
            if not type(mutation) == tuple:
                raise ValueError('Each mutations must be a tuple')
            if not len(mutation) == 4:
                raise ValueError('Each mutation must define chain, resnum, '
                                 'inscode, and resname')

            chain, idx, inscode, resname = mutation
            try:
                idx = int(idx)
            except ValueError:
                raise ValueError('The resnum must be an integer')

            # Catch residues not supported by the build.mutate method
            # This has to be disabled to support unnatural amino acids
            # Mutator.validate_mutated_residues( [resname] )
            # Dev: Todo, validate_mutated_residues should support unnaturals

    @property
    def mutations(self):
        """ The list of mutations that will be carried out """

        return self._mutations

    @mutations.setter
    def mutations(self, mutations):
        """
        The setter method for `self.mutations`.

        :raise RuntimeError: If the mutations have an invalid syntax

        """
        Mutator.validate_mutations(mutations)

        self._mutations = mutations

    @property
    def total_mutations(self):
        """
        Total number of mutations that will be generated
        """

        mutations_list = self.calculateMutationsList()
        return len(mutations_list)

[docs]    @classmethod
    def convert_res_file(cls, filename, regex=MUTATION_3_LETTER_RE):
        """
        Converts lines in filename into a list of mutations to use. Returns a list
        of tuples where each tuple is ( "chain", "resnum", "inscode",
        "three-letter resnames for mutation").

        Each line could be multiple mutations (one residue to multiple mutation states)

        :param filename: Name of file containing the list of mutations.
        :type  filename: str

        :param regex: Regular expression for matching residues
        :type regex: regular expression object

        :raise RuntimeError: If any of chain, resnum or mutation is missing

        :returns: List of mutations with valid syntax for the class
        :rtype: list of tuples
        """
        mutations = []
        with open(filename) as fileobj:
            for i, line in enumerate(fileobj, start=1):
                # Skip blank lines
                if not line.strip():
                    continue
                # Skip commented lines
                if line.startswith('#'):
                    continue
                new_muts = cls.convert_res_to_muts(line, regex, validate=False)
                if new_muts is None:
                    raise RuntimeError(
                        'Invalid syntax for mutation in %s: %s (line %s)' %
                        (filename, line, i))

                mutations.extend(new_muts)
        Mutator.validate_mutations(mutations)
        return mutations

[docs]    @classmethod
    def convert_res_list(cls,
                         reslist,
                         regex=MUTATION_3_LETTER_RE,
                         validate=True):
        """
        Converts list of residues into a list of mutations to use. Returns a list
        of tuples where each tuple is ( "chain", "resnum", "inscode",
        "three-letter resnames for mutation").

        Each residue string could be multiple mutations (one residue to
        multiple mutation states)

        :param reslist: List of residues to convert to mutations
        :type  reslist: list of str

        :param regex: Regular expression for matching residues
        :type regex: regular expression object

        :param validate: Whether to validate the potential mutations
        :type validate: bool

        :return: List of mutations with valid syntax for the class
                 or None if any item in the list is not valid
        :rtype: list of tuples or None
        """
        mutations = []
        for res_str in reslist:
            new_muts = cls.convert_res_to_muts(res_str, regex, validate=False)
            if new_muts is None:
                return None
            mutations.extend(new_muts)
        if validate:
            try:
                Mutator.validate_mutations(mutations)
            except ValueError:
                return None
        return mutations

[docs]    @staticmethod
    def convert_res_to_muts(res_str, regex=MUTATION_3_LETTER_RE, validate=True):
        """
        Converts a residue string into a list of mutations to use. Returns a
        list of tuples of ("chain", "resnum", "inscode",
        "three-letter resnames for mutation"). Will return None if any item
        in the list is not a valid residue string.

        A residue string could be multiple mutations (one residue to
        multiple mutation states)

        :param res_str: Residue string to convert to mutations
        :type  res_str: str

        :param regex: Regular expression for matching residues
        :type regex: regular expression object

        :param validate: Whether to run validation on the mutation
        :type validate: bool

        :return: List of mutations with valid syntax for the class or None if
                 the res_str is not valid.
        :rtype: list tuples or None
        """
        mutations = []
        match = regex.search(res_str)
        if not match:
            return None
        chain = match.group('chain')
        resnum = match.group('resnum')
        inscode = match.group('inscode') or ' '
        mut_types = match.group('mutations')
        #DEBUG print 'Getting mutations from file', chain, resnum, mut_types
        # If this is a 1 letter mutation we change the 1 letter to 3
        if regex == MUTATION_1_LETTER_RE:
            if mut_types:
                mut_types = [AA_1_LETTER_MAP[m] for m in mut_types]
        else:
            # split them by comma
            if mut_types:
                mut_types = [m for m in mut_types.split(',')]
        # resnum could be 0, converted to integer later. BIOLUM-2963
        if not chain or not resnum or not inscode or not mut_types:
            return None
        for resname in mut_types:
            mutation = (chain, int(resnum), inscode, resname)
            mutations.append(mutation)
        if validate:
            # Validate the parsed mutations now
            Mutator.validate_mutations(mutations)
        return mutations

[docs]    @staticmethod
    def convert_muts_file(muts_file, regex=MUTATION_RE):
        """
        Converts lines in filename into a list of mutations to use. Returns a list
        of tuples where each tuple is ( "chain", "resnum", "inscode",
        "three-letter resnames for mutation").

        Also supports loop insertion and deletion.

        Each line is one mutation (could be multiple residues)

        """
        mutations_list = []
        with open(muts_file) as fh:
            for line in fh:
                change_strings = line.strip().split()
                if len(change_strings) == 0:
                    continue
                # Skip commented lines
                if line.startswith('#'):
                    continue

                changes = []
                for change_str in change_strings:
                    match = regex.search(change_str)
                    if match:
                        chain = match.group('chain')
                        resnum = match.group('resnum')
                        inscode = match.group('inscode') or ' '
                        new_resname = match.group('new_resname')
                        # convert to 3-letter name if it is 1-letter code
                        if len(new_resname) == 1:
                            if new_resname in AA_1_LETTER_MAP:
                                new_resname = AA_1_LETTER_MAP[new_resname]
                            else:
                                raise RuntimeError("Invalid mutation: %s" %
                                                   change_str)
                        changes.append(
                            (chain, int(resnum), inscode, new_resname))
                    else:
                        raise RuntimeError("Invalid mutation: %s" % change_str)
                mutations_list.append(changes)

        if not mutations_list:
            raise RuntimeError("No mutations specified in the file")

        return mutations_list

[docs]    @staticmethod
    def convert_residue_list(residues, mutations, regex=MUTATION_3_LETTER_RE):
        """
        Convert a list of residues and mutations to a standard list of
        mutations.  Returns a list of tuples where each tuple is ( "chain",
        "resnum", "inscode", "three-letter resnames for mutation").

        :param residues: Residues that will be mutated.
        :type  residues: list of strings (Syntax: <chain>:<resnum> if no chain
            use ``"_"``)
        :param mutations: The three-letter names for the residues that will be
            used in mutation.

        :raise RuntimeError: If any of chain, resnum or mutation is missing
            or if there is an invalid residue name
        :returns: List of mutations with valid syntax for the class
        :rtype: list of tuples


        """
        mutations_list = []
        for res in residues:

            chain = None
            resnum = None

            match = regex.search(res)
            if match:
                chain = match.group('chain')
                resnum = match.group('resnum')
                inscode = match.group('inscode') or ' '

                #DEBUG print 'Getting mutations from list', chain, resnum

                for resname in mutations:
                    mutation = (chain, int(resnum), inscode, resname)
                    mutations_list.append(mutation)
            else:
                # Raise an error for non-blank lines that don't match regex
                raise RuntimeError('Invalid syntax for residue: %s' % res)

        # Validate the parsed mutations now
        Mutator.validate_mutations(mutations_list)

        return mutations_list

[docs]    def calculateMutationsList(self):
        """
        Calculate the mutations that will be performed, based on the
        input residues and their mutations, and the "concurrent"
        and "sequential" settings.
        """

        def filter_out_duplicate_residues(res_combos):

            # Set of (chain, resnum, inscode)
            seen_residues = set()
            filtered_res_combos = []

            for combo in res_combos:
                chain, resnum, inscode, res_type = combo
                res_id = (chain, resnum, inscode)
                if res_id not in seen_residues:
                    filtered_res_combos.append(combo)
                    seen_residues.add(res_id)

            return filtered_res_combos

        def are_sequential(mutations):
            """
            Determine if given mutation residues are sequential. Assumes that residues
            are given in sequence order.
            """

            for i in range(len(mutations) - 1):
                mut1 = mutations[i]
                mut2 = mutations[i + 1]

                atom1 = find_residue_atom(self.ref_struct, mut1[0], mut1[1],
                                          mut1[2])
                atom2 = find_residue_atom(self.ref_struct, mut2[0], mut2[1],
                                          mut2[2])
                if not mm.mmct_res_connected(self.ref_struct.handle,
                                             atom1.index, atom2.index):
                    # These 2 residues are not sequential
                    return False

            # All residues are sequential
            return True

        # Loop over all combinations of mutations of length self.concurrent
        all_combinations = []
        for num_concurrent_muts in range(1, self.concurrent + 1):
            # The above range loop is necessary to fix BIOLUM-122
            for res_combos in itertools.combinations(self.mutations,
                                                     num_concurrent_muts):

                # BIOLUM-1450 Filter out duplicate per-residue mutatons:
                filtered_res_combos = filter_out_duplicate_residues(res_combos)
                if len(filtered_res_combos) != len(res_combos):
                    # We must be doing this mutation already
                    continue

                # Make sure all mutations are concurrent for combination
                if self.sequential:
                    altered_residues = [c[:-1] for c in res_combos]
                    if not are_sequential(altered_residues):
                        continue
                all_combinations.append(res_combos)

        mutations_list = []

        # Iterate over all possible mutation combinations:
        for res_combos in all_combinations:
            # Sort the alteration list (for uniqueness comparison to work):
            mut_alterations = sorted(res_combos)

            if mut_alterations not in mutations_list:
                # Make sure we are not adding any duplicates (really needed?)
                mutations_list.append(mut_alterations)

        return mutations_list

[docs]    def generate(self):
        """
        Used to loop over all mutations. Each mutation consists of the mutated
        structure and a residue mapping dict. The structure is raw, that is,
        unrefined in any way.

        :returns: Generator for all mutations defined in `self.mutations` Each
                  step of generator yields a `mutation<Mutation>`.
        :rtype: generator

        """

        # Make a list of mutations:
        mutations_list = self.calculateMutationsList()

        # For each final mutation (can be one or more residue change):
        for i, changes in enumerate(mutations_list, start=1):
            # Each iteration of this loop represents a MUTATION. Each mutation
            # can consist or one or more residue changes.
            yield self.getMutationFromChanges(changes)

[docs]    def getMutationFromChanges(self, changes):

        mutation_map = OrderedDict()
        mutation_strings = []

        idealized_ref_struct = self.ref_struct.copy()

        # Idealize the reference structure (mutate every changed residue to itself):
        if self._idealize_reference:
            for chain, resnum, inscode, new_resname in changes:
                # Get the residue that is going to be mutated
                residue = idealized_ref_struct.findResidue('%s:%s' %
                                                           (chain, resnum))
                # FIXME no inscode here????

                curr_resname = residue.pdbres.strip()

                if curr_resname in Mutator.SUPPORTED_BUILD_RESIDUES:
                    res_atom = residue.getAtomIndices()[0]
                    # Mutate the residue to itself
                    build.mutate(idealized_ref_struct, res_atom, curr_resname)
                else:
                    print(
                        "WARNING: Residue %s (%s) could not be idealized, because it's a non-standard residue"
                        % (str(residue), curr_resname))

        mutated_st = self.ref_struct.copy()

        unfolded_contribution_gas = 0.0
        unfolded_contribution_prime = 0.0

        # Apply each residue change:
        for chain, resnum, inscode, new_resname in changes:
            # Find an atom from this residue in the CT:
            residue_atom = find_residue_atom(mutated_st, chain, resnum, inscode)

            orig_resname = residue_atom.pdbres.strip().upper()

            res_str = "%s:%i%s" % (chain, resnum, inscode)

            # Do the actual mutation
            if new_resname.startswith('+') or new_resname.startswith('-'):
                (mutated_st,
                 changed_seq) = self.getLoopMutation(mutated_st, res_str,
                                                     new_resname)
            else:
                renum_map = build.mutate(mutated_st, residue_atom.index,
                                         new_resname)

            # Get the reference residue that we are mutating
            reference_atom = find_residue_atom(idealized_ref_struct, chain,
                                               resnum, inscode)
            reference_residue = reference_atom.getResidue()

            # Get the mutated residue
            mutated_residue = residue_atom.getResidue()

            # Map the reference residue to the mutated residue
            mutation_map[reference_residue] = mutated_residue

            # NOTE: This string is being parsed by residue_scanning_viewer_gui.py:
            mut_str = "%s(%s->%s)" % (str(mutated_residue), orig_resname,
                                      new_resname)

            mutation_strings.append(mut_str)

            # Update the unfolded contribution for this mutant
            # TODO: for now set 0.0 for nonstandard residues
            # BIOLUM-2554, loop ins/del needs correction for peptide bond
            PEP_POT = -106.5
            PEP_PRIME = -30.1
            if new_resname.startswith('+'):
                dG1 = 0.0
                dG2 = 0.0
                for i, c in enumerate(changed_seq):
                    # FIXME: Ideally need need to get the 3-letter code of the
                    # actual residue - as multiple residues types can have the
                    # same 1-letter code.
                    resname = AA_1_LETTER_MAP[c]
                    dG1 += (self.GXG_DATA[resname] - self.GXG_DATA['GLY'] +
                            PEP_POT)
                    dG2 += (self.GXG_DATA_PRIME[resname] -
                            self.GXG_DATA_PRIME['GLY'] + PEP_PRIME)
            elif new_resname.startswith('-'):
                dG1 = 0.0
                dG2 = 0.0
                for i, c in enumerate(changed_seq):
                    # FIXME: Ideally need need to get the 3-letter code of the
                    # actual residue - as multiple residues types can have the
                    # same 1-letter code.
                    resname = AA_1_LETTER_MAP[c]
                    dG1 -= (self.GXG_DATA[resname] - self.GXG_DATA['GLY'] +
                            PEP_POT)
                    dG2 -= (self.GXG_DATA_PRIME[resname] -
                            self.GXG_DATA_PRIME['GLY'] + PEP_PRIME)
            else:
                try:
                    dG1 = (self.GXG_DATA[new_resname] -
                           self.GXG_DATA[orig_resname])
                    dG2 = (self.GXG_DATA_PRIME[new_resname] -
                           self.GXG_DATA_PRIME[orig_resname])
                except:
                    dG1 = 0.0
                    dG2 = 0.0
            unfolded_contribution_gas += dG1
            unfolded_contribution_prime += dG2

        # Comma-separated list of: "H:19(ASN->ALA)"
        # NOTE: These strings are being parsed by residue_scanning_viewer_gui.py:
        mutated_st.property[self.MUTATIONS_PROPERTY] = ",".join(
            mutation_strings)
        mutated_st.property[self.UNFOLDED_PROPERTY] = unfolded_contribution_gas
        mutated_st.property[
            self.UNFOLDED_PROPERTY_PRIME] = unfolded_contribution_prime

        mutation = Mutation(idealized_ref_struct, mutated_st, mutation_map)

        return mutation

[docs]    def getLoopMutation(self, mutated_st, res_str, new_resname):
        """
        build loop insertion or deletion
        """
        settings = {}
        settings['NUM_OUTPUT_STRUCT'] = 1
        settings['EXPAND_LOOP'] = 2
        settings['QUIET'] = 'yes'
        os.environ['PSP_EXEC'] = hunt('psp')

        resid_list = []
        resname_list = []  # List of 3-letter PDB names for all residues
        for res in mutated_st.residue:
            resid_list.append(str(res))
            resname_list.append(res.pdbres.replace(' ', ''))
        anchor_resid = res_str.replace(' ', '')
        ianchor = resid_list.index(anchor_resid)
        anchor_seq = AA_3_LETTER_MAP[resname_list[ianchor]]
        if new_resname.startswith('+'):
            # A:52 +GLY will do loop between A:51,A:54 with seq "D(52)GLYR(53)"
            istart = ianchor - 1
            start_resid = resid_list[istart]
            iend = ianchor + 2
            end_resid = resid_list[iend]
            anchor_next_seq = AA_3_LETTER_MAP[resname_list[ianchor + 1]]
            build_seq = anchor_seq + new_resname[
                1:] + anchor_next_seq  # actual building sequence
            changed_seq = new_resname[1:]
        elif new_resname.startswith('-'):
            # A:52 -2 will do loop between A:51,A:56 with seq "R(52)P(55)", 53,54 are deleted
            del_length = int(new_resname[1:])
            iend = ianchor + 2 + del_length
            end_resid = resid_list[iend]
            istart = ianchor - 1
            start_resid = resid_list[istart]
            build_seq = anchor_seq + AA_3_LETTER_MAP[resname_list[iend - 1]]
            changed_seq = ''
            for index in range(ianchor + 1, ianchor + 1 + del_length):
                changed_seq += AA_3_LETTER_MAP[resname_list[index]]

        chain = start_resid[0:1]
        stemresnum1 = int(start_resid[2:])
        stemresnum2 = int(end_resid[2:])
        loops = []
        loops.append([chain, stemresnum1, stemresnum2, build_seq])
        MLB = MultiLoopBuilder(mutated_st, loops, settings)
        if MLB.models:
            return (MLB.models[0], changed_seq)
        else:
            err = 'Cannot build insertion: %s %s' % (res_str, build_seq)
            raise RuntimeError(err)


[docs]class Refiner:
    """
    Creates input files and runs calculations for protein refinement jobs using
    Prime and our `schrodinger.structutils.minimize.Minimizer` class.

    Here is an example of how to refine a protein that just had a residue
    mutated. In this example only the residues within 7.0 angstroms of
    the mutated residue will be refined::

      from schrodinger.structure import StructureReader
      from schrodinger.structutils import build
      from schrodinger.application.bioluminate import protein

      # Get the structure
      st = StructureReader('receptor.maegz')

      # Atom number 30 is the alpha carbon of a GLU
      ca = st.atom[30]

      # Mutate GLU -> ASP
      renum_map = build.mutate(st, ca.index, "ASP")

      # Get the residue that was mutated
      mutated_residue = None
      for res in st.residue:
          ca_keys  = (ca.chain,  ca.resnum,  ca.inscode)
          res_keys = (res.chain, res.resnum, res.inscode)
          if ca_keys == res_keys:
              mutated_residue = res
              break

      # We want to use the reference to gather the residues to refine
      refine_residues = protein.get_residues_within(
          st,
          [mutated_residue],
          within = 7.0
      )

      # Create the refiner
      refiner = protein.Refiner(st, residues=refine_residues)

      # Run Prime minimization which returns the refined structure
      refined_struct = refiner.runPrimeMinimization('my_refinement_jobname')


    """
    # Available refinement jobs
    PYTHON_MINIMIZE = 'python_minimize'
    PRIME_MINIMIZE = 'prime_minimize'
    PRIME_RESIDUE = 'prime_residue'
    PRIME_SIDECHAIN = 'prime_sidechain'
    PRIME_SIDECHAIN_CBETA = 'prime_sidechain_cbeta'
    PRIME_SIDECHAIN_BB = 'prime_sidechain_bb'
    PRIME_LOOP_PRED = 'prime_loop_prediction'  # can set up multiloop jobs too
    PRIME_ANTIB_LOOP_PRED = 'prime_antibody_loop_prediction'

    # FIXME Add a <cleanup> argument (defaulting to True) and implement automatic cleanup.
[docs]    def __init__(self, struct, residues=None):
        """
        :param struct: The structure being refined
        :type  struct: `schrodinger.structure.Structure`

        :param residues: Residues to consider for refinement
        :type  residues: None or list/tuple of `structure.structure._Residue`
        """
        self.struct = struct
        self.residues = residues
        self.jobdir = self._createJobDir()

        self.python_types = [self.PYTHON_MINIMIZE]
        self.prime_types = [
            self.PRIME_MINIMIZE, self.PRIME_RESIDUE, self.PRIME_SIDECHAIN,
            self.PRIME_SIDECHAIN_CBETA, self.PRIME_SIDECHAIN_BB,
            self.PRIME_LOOP_PRED, self.PRIME_ANTIB_LOOP_PRED
        ]

[docs]    def setResidues(self, residues):
        """
        Set the residues to refine.  This is a list of integers refering to the
        residue indices for the structure.

        """
        self.residues = residues

    def _createJobDir(self):
        """
        Private method to create a directory to run the refinement jobs from.
        The name is "refinement_job_<n>" where n is either 1 or the next
        increment of a previously found directory. For exampe if there is a
        directory named "refinement_job_5" the new directory will be
        "refinement_job_6".

        """
        dirs = [d for d in os.listdir(os.getcwd()) if os.path.isdir(d)]
        count = 0
        for d in dirs:
            match = re.search(r'refinement_job_(\d+)', d)
            if match:
                _count = int(match.group(1))
                if _count > count:
                    count = _count
        retries = 5
        while True:
            temp_dir = 'refinement_job_%d' % (count + 1)
            try:
                os.mkdir(temp_dir)
                break
            except OSError as err:
                if err.errno != errno.EEXIST:
                    raise
                count += 1
                retries -= 1
                if retries < 1:
                    raise

        return os.path.abspath(temp_dir)

[docs]    def clean(self):
        """ Remove all files created from the refinement job  """

        try:
            rmtree(self.jobdir)
        except Exception as err:
            print('Unable to remove the directory %s\n%s' % (self.jobdir, err))

[docs]    def writePrimeInput(self, refine_type, input_file, st_filename, **kwargs):
        """
        Writes the input file for a Prime refinement job.

        :param refine_type: The type of Prime refinement to run (see class variables)
        :type  refine_type: str
        :param  input_file: Name of the input file for the refinement job
        :type   input_file: str
        :param st_filename: Filename of the structure to be refined
        :type  st_filename: str

        :raise RuntimeError: If `refine_type` is not supported
        :rtype: None

        """
        if refine_type not in self.prime_types:
            raise RuntimeError(
                '"%s" is not a supported Prime refinement job. Available '
                'options:\n%s' % '\n  '.join(self.prime_types))

        # Remove the host kwarg, it is added at the end of this method
        host = kwargs.pop('host', None)

        # MultiRefine loop jobs may need to define max_jobs
        if host:
            try:
                hostlist = jobcontrol.host_str_to_list(host)
                nproc = hostlist[0][1]
                if nproc:
                    kwargs['max_jobs'] = int(nproc)  # must be lower case!
            except (ValueError, IndexError) as err:
                print('Unable to get the number of processes!', err)

        st_file = os.path.basename(st_filename)
        input_obj = PrimeConfig(st_file, **kwargs)

        # Remove some kwarg as they are in the defaults and not expected by method()
        sgb_mod = kwargs.pop('SGB_MOD', None)
        use_membrane = kwargs.pop('USE_MEMBRANE', None)

        method_map = {
            self.PRIME_MINIMIZE: input_obj.prepMinimize,
            self.PRIME_RESIDUE: input_obj.prepResidue,
            self.PRIME_SIDECHAIN: input_obj.prepSidechain,
            self.PRIME_SIDECHAIN_CBETA: input_obj.prepSidechainCBeta,
            self.PRIME_SIDECHAIN_BB: input_obj.prepSidechainBB,
            self.PRIME_LOOP_PRED: input_obj.prepLoop,
            self.PRIME_ANTIB_LOOP_PRED: input_obj.prepAntibodyLoop
        }

        method = method_map.get(refine_type)
        if method is None:
            raise ValueError('Invalid refinement: %s' % refine_type)
        elif self.residues:
            method(residues=self.residues, **kwargs)
        else:
            method(**kwargs)

        # Add the compute host to the input file here
        if host:
            input_obj['HOST'] = host

        input_obj.write(input_file)

[docs]    def refinePrime(self,
                    refine_type,
                    jobname,
                    completed_callback=None,
                    **kwargs):
        """
        Run a Prime refinement job through job control and return the refined
        output structure.

        :param refine_type: The type of Prime refinement to run (see class variables)
        :type  refine_type: str

        :param jobname: Jobname to use
        :type  jobname: str

        :param completed_callback: Whether to start the job and wait, or call
            given function with Job object is parameter on completion.
        :type completed_callback: callable

        :raise RuntimeError: If `refine_type` is not supported
        :raise RuntimeError: If launching the refinement job fails
        :raise RuntimeError: If the refinement job fails
        :returns: Refined structure
        :rtype: `schrodinger.structure.Structure` object or
            `schrodinger.job.jobcontrol.Job`

        """
        # TODO: Add this to a log
        #DEBUG if self.residues is None:
        #DEBUG print 'Adding "%s" refinement job for residues: ALL' % refine_type
        #DEBUG else:
        #DEBUG print 'Adding "%s" refinement job for residues: %s' % \
        #DEBUG ( refine_type, ', '.join(str(r) for r in self.residues))

        # Write out the structure and inp files for the refinement job
        struct_file = os.path.join(self.jobdir, '%s.maegz' % jobname)
        output_file = os.path.join(self.jobdir, '%s-out.maegz' % jobname)
        input_file = os.path.join(self.jobdir, '%s.inp' % jobname)
        log_file = os.path.join(self.jobdir, '%s.log' % jobname)

        self.struct.write(struct_file)
        self.writePrimeInput(refine_type, input_file, struct_file, **kwargs)

        command = [
            "prime",
            os.path.basename(input_file),  # Crashes with abs paths
        ]

        # Add the -HOST if given
        host = kwargs.get('host', None)

        # We ignore host=localhost since this causes issues with Prime.
        # See Ev:133176.
        if host and host != 'localhost':
            command += ['-HOST', host]

        if completed_callback:
            from schrodinger.job import jobhandler
            job = jobhandler.launch_job_with_callback(command,
                                                      completed_callback,
                                                      launch_dir=self.jobdir)
            return job
        else:
            job = jobcontrol.launch_job(command, launch_dir=self.jobdir)
            job.wait()
            if not job.succeeded():
                errmsg = 'Job failed.\nJobname: %s\nJobId: %s\n' % (jobname,
                                                                    job.job_id)
                lastlines = []
                with open(os.path.join(self.jobdir, log_file)) as fh:
                    alines = fh.readlines()
                    lastlines = alines[-30:]
                if lastlines:
                    errmsg += 'Below are the last 30 lines of the log file...\n'
                    errmsg += ''.join(lastlines)
                raise RuntimeError(errmsg)
            if job.StructureOutputFile:
                output = os.path.join(self.jobdir, job.StructureOutputFile)
            else:
                # multirefine prime jobs do not have job.StructureOutputFile
                output = output_file
            if not os.path.isfile(output):
                raise RuntimeError(
                    'Job has no output\n\nJobname: %s\nJobId: %s' %
                    (jobname, job.job_id))
            return structure.Structure.read(output)

[docs]    def runPrimeMinimization(self, jobname):
        """
        Shortcut to run a Prime minimization job

        :see: `Refiner.refinePrime` documentation

        """
        return self.refinePrime(self.PRIME_MINIMIZE, jobname)

[docs]    def runPrimeResidue(self, jobname):
        """
        Shortcut to run a Prime residue refinement job

        :see: `Refiner.refinePrime` documentation

        """
        return self.refinePrime(self.PRIME_RESIDUE, jobname)

[docs]    def runPrimeSidechain(self, jobname):
        """
        Shortcut to run a Prime sidechain refinement job

        :see: `Refiner.refinePrime` documentation

        """
        return self.refinePrime(self.PRIME_SIDECHAIN, jobname)

[docs]    def runPrimeSidechainCBeta(self, jobname):
        """
        Shortcut to run a Prime sidechain refinement job with CA-CB vector
        sampling. This will vary the orientation of the CA-CB bond by up
        to 30 degrees from the initial direction.

        :see: `Refiner.refinePrime` documentation

        """
        return self.refinePrime(self.PRIME_SIDECHAIN_CBETA, jobname)

[docs]    def runPrimeSidechainBB(self, jobname):
        """
        Shortcut to run a Prime sidechain refinement job with backbone sampling.
        This will sample the backbone by running a loop prediction on a set of
        3 residues centered on the residue for which the side chain is being
        refined.

        :see: `Refiner.refinePrime` documentation

        """
        return self.refinePrime(self.PRIME_SIDECHAIN_BB, jobname)

[docs]    def runPrimeLoopPrediction(self, jobname, start_res=None, end_res=None):
        """
        Shortcut to run a Prime loop prediction refinement job..

        :see: `Refiner.refinePrime` documentation

        """
        return self.refinePrime(self.PRIME_LOOP_PRED,
                                jobname,
                                start_res=start_res,
                                end_res=end_res)

[docs]    def runPythonMinimize(self, jobname):
        """
        Shortcut to run a `schrodinger.structutils.minimize.Minimizer`
        job.

        :param jobname: Jobname to use
        :type  jobname: str

        :returns: Minimized structure
        :rtype: `schrodinger.structure.Structure` object

        """
        minimizer = minimize.Minimizer(
            struct=self.struct, ffld_version=minimize.OPLS_2005,
            cleanup=False)  # SHARED-5335, BIOLUM-3602
        # Auto fixing of lewis causes charged cysteines
        # turning cleanup=False to restore old behavior

        minimize_indices = []
        # Get the atoms that will be minimized.
        if self.residues is None:
            minimize_indices.extend([a.index for a in self.struct.atom])
        else:
            for res in self.residues:
                minimize_indices.extend([a.index for a in res.atom])

        # Freeze all atoms we don't want minimized
        for atom in self.struct.atom:
            if atom.index not in minimize_indices:
                minimizer.addPosFrozen(atom.index)

        minimizer.minimize()

        return minimizer.getStructure()

[docs]    def runRefinement(self, refine_type, jobname, **kwargs):
        """
        Shortcut to run any of the available refinement jobs.

        :param refine_type: The type of Prime refinement to run (see class variables)
        :type  refine_type: str

        :param jobname: Jobname to use
        :type  jobname: str

        :raise RuntimeError: If `refine_type` is not supported
        :raise RuntimeError: If the refinement job fails
        :returns: Refined structure
        :rtype: `schrodinger.structure.Structure` object

        """
        if refine_type in self.prime_types:
            return self.refinePrime(refine_type, jobname, **kwargs)
        elif refine_type == self.PYTHON_MINIMIZE:
            return self.runPythonMinimize(jobname)


[docs]class Consensus:
    """
    Access the atoms, residues, and molecules (or just their indices) that
    are considered to be consensus objects for a template structure and
    query structure. All properties are returned as an `OrderedDict`
    that maps the template objects to their consensus objects from the
    query structure.

    Here is an example of how to get all the consensus waters between
    two protein structures. We define the cutoff here at 2 Angstroms::

      from schrodinger.structure import StructureReader
      from schrodinger.application.bioluminate import protein

      pt = maestro.project_table_get()

      # Create an ASL map for all ligands in the WS
      asl_map = []
      for row in pt.included_rows:
          st = row.getStructure()
          ligands = analyze.find_ligands(st)
          if not ligands:
              continue
          indices = []
          for ligand in ligands:
              indices.extend([str(i) for i in ligand.atom_indexes])

          asl = 'atom.n %s' % ','.join(indices)

          asl_map.append((st, asl))

      # Create a consensus of all ligands, specifying that at least three
      # structures must have a ligand atom within 2A from one another.
      consensus = protein.Consensus(asl_map, 3, dist_cutoff=2)

      # To get the atom objects
      consensus_atoms = consensus.atoms

      # To get the molecule objects
      molecules = consensus.molecules


    """
    ASL_WATER = 'water and NOT (atom.ele H)'
    ASL_WATER_NOZOB = 'water and NOT (atom.ele H) and NOT (withinbonds 1 (not water))'
    ASL_IONS = 'ions'
    ASL_LIGAND = '(((m.atoms 5-130)) and not ((ions) or '
    ASL_LIGAND += '(res.pt ACE ACT ACY BCT BME BOG CAC CIT CO3 DMS EDO EGL EPE '
    ASL_LIGAND += 'FES FMT FS3 FS4 GOL HEC HED HEM IOD IPA MES MO6 MPD MYR NAG '
    ASL_LIGAND += 'NCO NH2 NH3 NO3 PG4 PO4 POP SEO SO4 SPD SPM SUC SUL TRS )))'

[docs]    def __init__(self, asl_map, minimum_number, dist_cutoff=2.0):
        """
        :param asl_map: List of structures and the ASL used to limit the atoms
                used when calculating the consensus
        :type asl_map: tuple of (`structure<structure.Structure>`, ASL)

        :param minimum_number: The minimum number of matches within structures.
                An atom will be considered a "consensus" atom if it is within the
                `dist_cutoff` of at least `minimum_number` of structures in the
                list of passed in structures.
        :type  minimum_number: int

        :param dist_cutoff: Distance in Angstroms used to define a consensus match
        :type  dist_cutoff: float

        :attention: The list of consensus atoms (or molecules, residues, indices,
            etc. depending on the property called, i.e. `self.molecules`) will all
            be unique and will depend on the ASL passed in. If the ASL is not specific
            enough you may end up with poor results.

        """
        self.asl_map = asl_map
        self.number_cutoff = minimum_number
        self.dist_cutoff = dist_cutoff

        self._consensus_atoms = []
        """ Atom indices mapping template and query consensus """

        self._setIndexMap()

        self._atom_indices = None

    def _setIndexMap(self):
        """
        Private method called in __init__. Finds the consensus atom indices
        between the template and query.

        """
        # Get a mapping of structures to atom indices for the given asl
        #asl_atom_map = {}
        atom_list = []
        for st, asl in self.asl_map:
            indices = analyze.evaluate_asl(st, asl)
            atom_list.append([st.atom[i] for i in indices])

        # Prepare the container for all the consensus atoms.
        # This list is in the same order as the passed in structures.
        consensus_map = {}

        ref_count = 0
        for ref_atoms in atom_list:
            # For each atom matching the asl provided we loop through all the
            # other structures and see if there is a "consensus atom" among them
            for ref_atom in ref_atoms:
                # Track the total consensus atoms to make sure we meet cutoff
                consensus_total = 0

                close_atoms = {}

                mob_count = 0
                for mob_atoms in atom_list:
                    if ref_count > mob_count:
                        closest_atom = self.getClosest(ref_atom, mob_atoms)
                        if closest_atom:
                            consensus_total += 1
                            close_atoms.setdefault(mob_count,
                                                   []).append(closest_atom)

                    mob_count += 1

                # Only add atoms to the consensus map of they meet the cutoff
                if consensus_total >= self.number_cutoff:
                    consensus_map.setdefault(ref_count, []).append(ref_atom)
                    for k, v in close_atoms.items():
                        consensus_map.setdefault(k, []).extend(v)

            ref_count += 1

        # Add each set of atoms to the self._consensus_atoms so they match
        # the list of structures supplied in the init
        for i in range(len(self.asl_map)):
            self._consensus_atoms.append(consensus_map.get(i, []))

[docs]    def getClosest(self, ref_atom, mob_atoms):
        """
        Gets the closest atom to the `ref_atom` from `mob_atoms`.

        """
        dist = float(self.dist_cutoff)
        closest_atom = None
        # Find the closest query atom
        for mob_atom in mob_atoms:
            new_dist = measure.measure_distance(ref_atom, mob_atom)
            if new_dist < dist:
                dist = new_dist
                closest_atom = mob_atom

        return closest_atom

    def _createByAtomProperty(self, attr, is_callable=False):
        """
        Private method to get an atom property.

        :param attr: The attribute to get from the consensus atoms
        :type  attr: str
        :param is_callable: Whether the prop arg is a callable
        :type  is_callable: bool

        :returns: A list of lists of the new entities created from the properties
        :rtype: list of lists

        """
        entity_list = []
        for atom_list in self._consensus_atoms:
            entities = set()
            for atom in atom_list:
                if hasattr(atom, attr):
                    atom_attr = getattr(atom, attr)
                    if is_callable:
                        entities.add(atom_attr())
                    else:
                        entities.add(atom_attr)

            entity_list.append(list(entities))

        return entity_list

    @property
    def atoms(self):
        """
        Get the map of `atom objects<structure._StructureAtom>` of consensus
        atoms.

        :returns: Atoms of consensus atoms
        :rtype: `OrderedDict` of `atom objects<structure._StructureAtom>`
                where the keys are the template atoms and their values are
                the consensus atoms from the query.

        """
        return self._consensus_atoms

    @property
    def atom_indices(self):
        """
        Get the map of atom indices of consensus atoms.

        :returns: Atom indices of consensus atoms
        :rtype: `OrderedDict` of ints where the keys are the template atom
                indices and their values are the consensus atom indices from
                the query.

        """
        if not self._atom_indices:
            self._atom_indices = []
            for atoms in self.consensus_atoms:
                self._atom_indices.append([a.index for a in atoms])

        return self._atom_indices

    @property
    def residues(self):
        """
        Get the list of `residue objects<structure._Residue>` of consensus
        atoms for each structure in `self.asl_map`.

        :returns: Residues of consensus atoms
        :rtype: list of unique consensus `residue objects<structure._Residue>`
                for each structure in `self.asl_map`. (Order is maintained)

        """
        residues = []
        for res_list in self._createByAtomProperty('getResidue', True):
            residue_names = []
            unique_residues = []
            for res in res_list:
                if not str(res) in residue_names:
                    unique_residues.append(res)
                residue_names.append(res)
            residues.append(unique_residues)

        return residues

    @property
    def residue_indices(self):
        """
        Get the map of residue indices of consensus atoms.

        :returns: Residue indices of consensus atoms
        :rtype: list of unique consensus residue indices for each structure in
                `self.asl_map`. (Order is maintained)

        """
        return self._createByAtomProperty('resnum')

    @property
    def molecules(self):
        """
        Get the map of `molecule objects<structure._Molecule>` of consensus
        atoms.

        :returns: Molecules of consensus atoms
        :rtype: list of unique consensus `molecule objects<structure._Molecule>`
                for each structure in `self.asl_map`. (Order is maintained)

        """
        molecules = []
        for mol_list in self._createByAtomProperty('getMolecule', True):
            molecule_names = []
            unique_molecules = []
            for mol in mol_list:
                if not str(mol) in molecule_names:
                    unique_molecules.append(mol)
                molecule_names.append(mol)
            molecules.append(unique_molecules)

        return molecules

    @property
    def molecule_indices(self):
        """
        Get the map of molecule indices of consensus atoms.

        :returns: Molecule indices of consensus atoms
        :rtype: list of unique consensus molecule indices for each
                structure in `self.asl_map`. (Order is maintained)

        """
        return self._createByAtomProperty('molecule_number')