Source code for schrodinger.application.prepwizard

"""
Shared functionality between PrepWizard GUI and command-line PrepWizard.

Copyright Schrodinger, LLC. All rights reserved.

"""

import base64
import itertools
import json
import zlib

import schrodinger
from schrodinger.application.prepwizard2.prepare import calc_state_score
from schrodinger.infra import mm
from schrodinger.protein import biounit
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.structutils import measure
from schrodinger.structutils.interactions import hbond

maestro = schrodinger.get_maestro()

# Maximum number of bonds that can be made to an atom (by element):
maxvalence = [
    0, 1, 1, 1, 2, 6, 4, 4, 2, 1, 1, 1, 2, 8, 8, 8, 8, 1, 1, 1, 2, 9, 9, 9, 9,
    9, 9, 9, 9, 9, 9, 9, 9, 8, 8, 1, 1, 1, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
    9, 9, 8, 1, 1, 1, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
    9, 9, 9
]

# List of PDB residue names to NOT run Epik on:
EPIK_EXCLUSION_LIST = ['HEM', 'HEC']

STATE_PANELTY_PROP = 'r_epik_State_Penalty'
METAL_PENALTY_PROP = 'r_epik_Metal_State_Penalty'
ORIG_ANUM_PROP = 'i_ppw_anum'
HET_NUM_PROP = 'i_ppw_het'
HET_STATES_PROP = 's_ppw_het_states'

PDBNAME_ELEMENT_DICT = {}
for pdbname in [' OD1', ' OE1', ' OD2', ' OE2']:
    PDBNAME_ELEMENT_DICT[pdbname] = "O"
for pdbname in [' ND1', ' ND2', ' NE2']:
    PDBNAME_ELEMENT_DICT[pdbname] = "N"
for pdbname in [' CG ', ' CD ', ' CD2', ' CE1']:
    PDBNAME_ELEMENT_DICT[pdbname] = "C"

# Dictionary that defines possible ionization states for metals:
# Key is the atomic number.
metal_charges = {
    11: [1],  # NA Sodium  +1
    12: [2],  # MG Magnesium +2
    19: [1],  # K  Potassium +1
    20: [2],  # CA Calcium +2
    25: [2, 3, 4, 5, 6, 7],  # MN Manganese +2, +3, +4, +5, +6, +7
    26: [2, 3],  # FE Iron +2 +3
    27: [2, 3],  # CO Cobalt +2 +3
    28: [2, 3],  # NI Nickel +2 +3
    29: [1, 2, 3],  # CU Copper +1 +2 +3
    30: [2],  # ZN Zinc +2
    #   48:[2], # CD Cadmium +2 No MacroModel parameters
    #   80:[1, 2]  HG Mercury +1 +2  No MacroModel parameters
}


def _count_bonds(a):
    """
    Count number of bonds to the given StructureAtom (double bonds = 2 bonds),
    zero-order bonds are not counted.
    """
    total_orders = 0
    for bond in a.bond:
        order = bond.order
        total_orders += order
    return total_orders


[docs]def serialize_het_states(states_by_het, complex_st):
    """
    Set JSON string for all given states as a CT-level property.
    """
    # Keys are het nums (as str, as required by JSON module), values are
    # lists of hets. Het number is included so that restoring is possible
    # even if some hets were later removed by the user (i_ppw_het
    # property would remain unchanged).
    states_by_het = {
        str(hetnum): [s.serialize() for s in states
                     ] for hetnum, states in states_by_het.items()
    }

    json_str = json.dumps(states_by_het)
    compressed_str = zlib.compress(bytes(json_str, 'utf-8'))
    encoded_str = base64.b64encode(compressed_str).decode('utf-8')
    complex_st.property[HET_STATES_PROP] = encoded_str


[docs]def deserialize_het_states(complex_st):
    """
    Attempt to read the het states from the CT using the s_ppw_het_states
    property, if present.
    """
    encoded_str = complex_st.property.get(HET_STATES_PROP)
    if not encoded_str:
        # No states to load
        return []

    compressed_str = base64.b64decode(encoded_str)
    json_str = zlib.decompress(compressed_str)
    states_dict = json.loads(json_str)

    # In JSON, all keys are str, to cast to int:
    all_hets_and_states = {int(hetnum): v for hetnum, v in states_dict.items()}

    return all_hets_and_states


[docs]class HetState:
    """
    Class representing an ionization/tautomeric state of a het (e.g. ligand).
    """

[docs]    def __init__(self, hetnum):
        self.hetnum = hetnum  # Het number (value of i_ppw_het properties)
        self.charges = None  # items=(original anum, formal charge)
        self.orders = None  # items=(orig anum1, orig anum2, order)
        self.penalty = None  # Epik state penalty; 0.0 for metals
        self.metal_penalties = None  # keys=orig anum, values=metal penalty
        self.score = None  # Score of the het state, in context of protein
        self.info_str = None  # Information string explaining state details,
        # including num hbonds to receptor.

[docs]    @classmethod
    def initFromEpikOutput(cls, hetnum, state_st):
        """
        Return a HetState instance for the given Epik output structure.
        """
        self = cls(hetnum)
        self.charges, self.orders = self.extractStateChargesAndOrders(state_st)

        state_penalty, metal_penalties = get_state_penalties(state_st)

        self.penalty = state_penalty  # Will be 0.0 for metals
        self.metal_penalties = metal_penalties
        # NOTE: score and info_str will be calculated when state is applied
        # to the complex CT
        return self

[docs]    @classmethod
    def initFromSerializedState(cls, hetnum, serialized_state):
        """
        Return a HetState instance for the given serialized state.
        """
        self = cls(hetnum)
        (self.charges, self.orders, self.penalty, self.metal_penalties,
         self.score, self.info_str) = serialized_state
        # Convert atom number keys back to ints. This is needed because all
        # keys are loaded as strings when reading JSON:
        self.metal_penalties = {
            int(anum): penalty
            for anum, penalty in self.metal_penalties.items()
        }
        return self

[docs]    def serialize(self):
        # JSON requires keys to be strings:
        return (self.charges, self.orders, self.penalty, self.metal_penalties,
                self.score, self.info_str)

[docs]    def extractStateChargesAndOrders(self, state_st):
        hetnum = self.hetnum

        # Dict of charge information for each atom in the output state.
        # Keys are original/input atom indices:
        charge_data = []
        for atom in state_st.atom:
            # Iterate over all atoms in this Epik output CT:
            anum = atom.property.get(ORIG_ANUM_PROP)

            # Skip atoms that were not in the original het. This includes the
            # protein atoms covalently bound to the het, and hydrogens that
            # were added by Epik.
            if atom.property.get(HET_NUM_PROP) != hetnum:
                continue

            # If we got here then we are processing a heavy atom that is part
            # of the het - so it must have i_ppw_anum set, as Epik doesn't
            # touch heavy atoms.
            assert anum, "ERROR: No i_ppw_anum property for atom"
            charge_data.append((anum, atom.formal_charge))

        order_data = []
        for bond in state_st.bond:
            atom1 = bond.atom1
            atom2 = bond.atom2
            if atom1.property.get(HET_NUM_PROP) != hetnum or atom2.property.get(
                    HET_NUM_PROP) != hetnum:
                # Hydrogen or covalently bound protein atom:
                continue

            anum1 = atom1.property.get(ORIG_ANUM_PROP)
            anum2 = atom2.property.get(ORIG_ANUM_PROP)
            assert anum1 is not None and anum2 is not None

            order_data.append((anum1, anum2, bond.order))

        return charge_data, order_data

[docs]    def applyState(self, st):
        """
        Apply this het state to the specified complex structure: modify
        the bond orders and formal charges to apply the currently selected
        het state.

        Also calculates the score and info_str properties

        Raises RuntimeError if any of the het atoms are no longer in the
        structure, or are not numbered correctly.
        """

        # Delete hydrogen neighbors before adding them according to next state:
        het_atom_indices = self.findHetAtoms(st)
        hydrogens = hydrogen_neighbors(st, het_atom_indices)
        st.deleteAtoms(hydrogens)

        # Need to find het atoms again, as it's possible that removing
        # hydrogens has altered atom ordering:
        het_atom_indices = self.findHetAtoms(st)

        # Keys are original atom indices (prior to pre-processing), values are
        # atom objects, for hets that are still present in the structure.
        atoms_by_anum = {}
        for anum in het_atom_indices:
            atom = st.atom[anum]
            # Iterate over het heavy atoms in the Workspace CT:
            # This atom's atom number in input (unprepared) complex CT:
            anum = atom.property.get(ORIG_ANUM_PROP)
            assert anum is not None
            atoms_by_anum[anum] = atom

        expecting_anums = {anum for (anum, _) in self.charges}
        for anum1, anum2, _ in self.orders:
            expecting_anums.add(anum1)
            expecting_anums.add(anum2)
        missing_atoms = expecting_anums.difference(atoms_by_anum.keys())
        if missing_atoms:
            raise RuntimeError(
                'Could not apply het state (%i atoms missing or renumbered)' %
                len(missing_atoms))

        # Change the bond orders and charges as necessary:
        for anum, charge in self.charges:
            atom = atoms_by_anum[anum]
            atom.formal_charge = charge
            if charge > 0:
                charge_str = '+%i' % charge
            elif charge < 0:
                charge_str = '%i' % charge
            else:
                charge_str = ''
            atom.label_user_text = str(charge_str)
            atom.label_format = "%UT"
            atom.label_color = 13

        for anum1, anum2, order in self.orders:
            atom1 = atoms_by_anum[anum1]
            atom2 = atoms_by_anum[anum2]
            bond = st.getBond(atom1, atom2)
            bond.order = order

        num_atoms_pre_htreat = st.atom_total
        # Add hydrogens according to the new bond orders and formal charges:
        st.retype()
        build.add_hydrogens(st, atom_list=het_atom_indices)

        # Hide the new hydrogens (so that they are not flickered when selecting
        # state):
        for anum in range(num_atoms_pre_htreat + 1, st.atom_total):
            st.atom[anum].visible = False

        # Calculate the total score for this state. The score may have changed
        # since the states were generated by Epik due to user's changes - for
        # example removing an interacting metal or water molecule.
        self.score, self.info_str = calc_state_score(st, het_atom_indices,
                                                     self.penalty,
                                                     self.metal_penalties)

[docs]    def findHetAtoms(self, complex_st):
        """
        Return a list of het heavy atoms in the given complex structure.
        """
        asl = "(atom.%s %i)" % (HET_NUM_PROP, self.hetnum)
        return analyze.evaluate_asl(complex_st, asl)


class _CommonProblemsFixer:
    """
    Class for fixing common structure problems.
    """

    def _hasZobs(self, atom):
        """
        Return True if any of the bonds of this atom are zero-order.
        """
        for bond in atom.bond:
            if bond.order == 0:
                return True
        return False

    def _setFormalCharge(self, atom, new_charge):
        """
        Set the formal charge for the given atom. Logs the change in the
        self._corrections_list.
        """
        prev_formal_charge = atom.formal_charge
        if prev_formal_charge == new_charge:
            return
        atom.formal_charge = new_charge
        msg = "Changed atom %i (%s) charge from %i to %i" % (
            atom.index, atom.element, prev_formal_charge, new_charge)
        self._corrections_list.append(msg)

    def _deprotCarbAcid(self, carbon):
        """
        De-protonate the specified carboxylic acid.

        :type carbon: `structure._StructureAtom` object
        :param carbon: The carbon of the carboxylic acid.
        """
        # NOTE: This is no longer needed due to CONV-855/CONV-860 fix

        oxygen_double = None
        oxygen_single = None
        for bond in carbon.bond:
            atom2 = bond.atom2
            if atom2.element == "O":
                if bond.order == 2:
                    oxygen_double = atom2
                else:
                    oxygen_single = atom2
        if not oxygen_double or not oxygen_single:
            # If this ever happens, then this is not a carboxylic acid, or the
            # residue has missing atoms. Skip this group.
            return
        self._setFormalCharge(oxygen_single, -1)

        # Delete the hydrogen bound to the oxygen (if any):
        for other_atom in oxygen_single.bonded_atoms:
            if other_atom.element == "H":
                self._atoms_to_delete.append(other_atom.index)
                correction = (
                    "Deleted hydrogen %i" % other_atom.index +
                    " because it was interfering with a zero-order bond")
                self._corrections_list.append(correction)

    def _fixElement(self, atom):
        """
        If an atom's element (Whether it's an oxygen, nitrogen, or carbon) does
        not correspond to its PDB name, it gets reset. (Ev:69207)
        """
        expected_element = PDBNAME_ELEMENT_DICT.get(atom.pdbname)
        if expected_element:
            prev_element = atom.element
            if prev_element != expected_element:
                correction = "Changed atom %i (%s) element from %s to %s" % (
                    atom.index, atom.pdbname, prev_element, expected_element)
                atom.element = expected_element
                self._corrections_list.append(correction)

    def _processSulfur(self, atom):
        """
        Ev:105338 Fix formal charges on sulfurs
        """
        all_neighbors_are_carbons = True
        total_orders = 0
        for bond in atom.bond:
            order = bond.order
            total_orders += order
            if bond.atom2.element != "C":
                all_neighbors_are_carbons = False
        if total_orders == 3 and all_neighbors_are_carbons:
            prev_formal_charge = atom.formal_charge
            if prev_formal_charge != 1:
                self._setFormalCharge(atom, +1)

    def _processBackboneOxygen(self, atom):
        """
        PPREP-875 Remove extra bonds when residue oxygens are bound to covalent
        ligands
        """
        # R-O=R will become R-O-R
        if _count_bonds(atom) >= 3 and atom.formal_charge <= 0:
            # This oxygen is making one too many bonds. Remove one.
            # We ignore R-O(+)=R, which is valid.
            if len(atom.bond) != 2:
                return
                # For now we handle only the case where one bond is single
                # and one bond is double.
            # Find the double bond and make it single:
            for bond in atom.bond:
                if bond.order >= 2:
                    bond.order = 1
                    res = str(bond.atom1.getResidue())
                    correction = "Modified bond %i-%i (%s): Set order to 1" % \
                        (bond.atom1, bond.atom2, res)
                    self._corrections_list.append(correction)
                    break

    def run(self, st):
        """
        Fix common structure problems in the given Structure.

        :rtype: list of strings
        :return: A list of texts describing what changes were made.
        """
        self._atoms_to_delete = []
        self._corrections_list = []

        for atom in st.atom:
            self._fixElement(atom)

            if atom.element == "S":
                self._processSulfur(atom)

            # Ev:108985
            elif atom.element == "B":
                if _count_bonds(atom) == 4 and atom.formal_charge == 0:
                    self._setFormalCharge(atom, -1)

            # Ev:120900 Delete hydrogens with more than one non-ZOB bond (PDB error)
            elif atom.element == "H":
                num_bonds = 0
                for bond in atom.bond:
                    if bond.order > 0:
                        num_bonds += 1
                if num_bonds > 1:
                    self._atoms_to_delete.append(atom.index)
                    correction = "Deleted atom %i: This hydrogen had invalid number of bonds (%i)" % (
                        atom.index, num_bonds)
                    self._corrections_list.append(correction)

            elif atom.pdbname == " O  ":
                self._processBackboneOxygen(atom)

            elif atom.pdbname in (" OD1", " OD2"):
                # PPREP-1092 Find carboxylic acids with ZOBs
                # Use pdbnames instead of elements as an optimization (since
                # fewer atoms will have these names, while there are many Os).
                # NOTE: This is no longer needed due to CONV-855/CONV-860 fix
                if self._hasZobs(atom):
                    for bond in atom.bond:
                        if bond.order != 0 and bond.atom2.element == "C":
                            self._deprotCarbAcid(bond.atom2)

            # PPREP-943 Fix formal charges on some irons:
            # (FIXME perhaps make this correction more broad?)
            elif atom.element == "Fe":
                if atom.pdbres == "SRM " and atom.pdbname == "FE  ":
                    self._setFormalCharge(atom, +2)

        if self._atoms_to_delete:
            st.deleteAtoms(self._atoms_to_delete)

        st.retype()
        return self._corrections_list


[docs]def fix_common_structure_mistakes(st):
    """
    Fixes common problems in structures and returns a list of correction strings
    that are to be reported to the user.
    """

    return _CommonProblemsFixer().run(st)


[docs]def check_st_valences(st):
    """
    Will raise a RuntimeError if any atom in the specified structure
    has more bonds than is allowed for that element.
    Het groups in the PDB sometimes have this issue.
    """

    messages = []
    for a in st.atom:
        total_orders = _count_bonds(a)

        if total_orders > maxvalence[a.atomic_number]:
            msg = "WARNING: Assign Bond Orders: Atom %i (%s) has too many bonds (%i)" \
                % (a.index, a.element, total_orders)
            messages.append(msg)

    if messages:
        raise RuntimeError("\n".join(messages))


[docs]def atomsasl(atoms):
    """ Generates an ASL expression for the specified atoms. """
    if len(atoms) == 0:
        raise Exception("ERROR: empty atom list passed to atomsasl()!")
    asl = "(atom.num %s)" % ', '.join(map(str, atoms))
    return asl


def _create_bonds_by_smarts(st,
                            bond_type,
                            smarts1,
                            smarts2,
                            dist,
                            verbose=False):
    """
    General function to create bonds between the first atom in two
    different smarts patterns if they are closer then a distance. The smarts
    matching is performed on a per residue basis.
    :param st: Structure to modify.
    :type st: Schrodinger.structure

    :param bond_type: Bond type to be used for reporting
    :type bond_type: String

    :param smarts1: The first smarts pattern to consider, by convention
                    this should be the protein if that applies
    :type bond_type: String

    :param smarts2: The second smarts pattern to consider, by convention
                    this should be the non-protein part if that applies
    :type bond_type: String

    :param dist: Atoms must be at least this close to consider for
       for glycosilation
    :type dist: float

    :param verbose: Whether to print formed bonds to stdout
    :type verbose: boolean

    :rparam: Pairs of atom ( in the output structure ) where
       bonds were added
    :rtype: list of schrodinger atom objects
    """
    output = []
    attach_list = [[], []]
    for res in st.residue:
        res_anums = res.getAtomIndices()
        res_ct = res.extractStructure()
        for smarts, attach in zip([smarts1, smarts2], attach_list):
            match = analyze.evaluate_smarts_canvas(res_ct, smarts)
            # Map residue atom ids back to main structure
            for a_match in match:
                anum_by_st = res_anums[a_match[0] - 1]
                attach.append(anum_by_st)
    smarts1_attach, smarts2_attach = attach_list
    if not smarts1_attach or not smarts2_attach:
        return output

    atoms_to_delete = []
    atoms_to_add_hyd = []
    used_atoms = set()
    atoms = smarts1_attach + smarts2_attach
    for iatom1, iatom2 in measure.get_close_atoms(st, dist, atoms=atoms):
        atom1 = st.atom[iatom1]
        atom2 = st.atom[iatom2]
        # Don't reuse atoms
        if atom1.index in used_atoms or atom2.index in used_atoms:
            continue
        # Identify close atoms that are not bound
        if st.areBound(atom1, atom2):
            continue
        # Check both ways
        for s1_atom, s2_atom in itertools.permutations([atom1, atom2]):
            if s1_atom.index not in smarts1_attach:
                continue
            if s2_atom.index not in smarts2_attach:
                continue
            st.addBond(s1_atom, s2_atom, 1)
            output.append((s1_atom, s2_atom))
            used_atoms.add(s1_atom.index)
            used_atoms.add(s2_atom.index)
            # Keep track of the hydrogens to delete on each side
            # and which atoms to add hydrogens to.  This will all be
            # done at the end to avoid messing up indexes
            for atom in s1_atom, s2_atom:
                for ba in atom.bonded_atoms:
                    if ba.element == "H":
                        atoms_to_delete.append(ba.index)
                        atoms_to_add_hyd.append(atom.index)
            if verbose:
                print('   Created a %s bond between %s and %s' %
                      (bond_type, "%s:%d%s(%s):%s" %
                       (s1_atom.chain, s1_atom.resnum, s1_atom.inscode,
                        s1_atom.pdbres, s1_atom.pdbname), "%s:%d%s(%s):%s" %
                       (s2_atom.chain, s2_atom.resnum, s2_atom.inscode,
                        s2_atom.pdbres, s2_atom.pdbname)))

    # Note that since we are using structure Atom objects for output,
    # the indexes autoamtically update as the underlying CT is changed
    if len(atoms_to_delete) > 0:
        rmap = st.deleteAtoms(atoms_to_delete, renumber_map=True)
        atoms_to_add_hyd = [rmap[i] for i in atoms_to_add_hyd]
    if len(atoms_to_add_hyd) > 0:
        build.add_hydrogens(st, atom_list=atoms_to_add_hyd)
    return output


[docs]def create_glycosylation_bonds(st, dist=1.8, verbose=True):
    """
    Create glycosylation bonds for N-linked or O-linked glycosilation events
    Identfies neutral O or N with implicit or explicit hydrogens and
    forms bonds to sugars ( ring with 5 aliphatic carbons and one oxygen )
    at locations adjacent to the oxygen.
    :param st: Structure to modify.
    :type st: Schrodinger.structure

    :param dist: Atoms must be at least this close to consider for
       for glycosilation
    :type dist: float

    :param verbose: Whether to print formed bonds to stdout
    :type verbose: boolean

    :rparam: Pairs of atom ( in the output structure ) where
       bonds were added
    :rtype: list of schrodinger atom objects
    """
    # ( note that only NH1 is checked for for ARG as NH2 is positively
    # charged.
    return _create_bonds_by_smarts(
        st,
        "glycosylation",
        "[N,O;+0;h,H1,H2]",
        "[C;r6;H2,H1,h][O;r6][C;r6][C;r6][C;r6][C;r6]",
        dist,
        verbose=verbose)


[docs]def create_palmitoylation_bonds(st, dist=1.8, verbose=True):
    """
    Create palmitoylation bonds.
    Identfies neutral cysteine S with implicit or explicit hydrogens and
    palmitoyl groups or palmitoyl groups with the OH of the acid replace by
    a hydrogen
    :param st: Structure to modify.
    :type st: Schrodinger.structure

    :param dist: Atoms must be at least this close to consider for
       for pamitoylation
    :type dist: float

    :param verbose: Whether to print formed bonds to stdout
    :type verbose: boolean

    :rparam: Pairs of atom ( in the output structure ) where
       bonds were added
    :rtype: list of schrodinger atom objects
    """
    return _create_bonds_by_smarts(st,
                                   "palmitoylation",
                                   "[S,O;+0;h,H1,H2]",
                                   "[C;+0;h,H1](=[O;+0])",
                                   dist,
                                   verbose=verbose)


[docs]def create_disulfide_bonds(st, dist=3.2, verbose=False):
    """
    Create bonds between proximal Sulfurs, deleting any hydrogens on them.
    If verbose is True, prints log info to the termnal.
    Returns a list of (atom1, atom2) for ever added bond.
    """

    cys_sulfurs = analyze.evaluate_asl(
        st,
        '((res.ptype "CYS ") OR (res.ptype "CYX ")) AND ((atom.ptype " SG "))')

    added_bonds = []  # Ev:92646
    atoms_to_delete = []
    for isulfur_index in cys_sulfurs:
        for jsulfur_index in cys_sulfurs:
            if jsulfur_index >= isulfur_index:
                continue

            isulfur = st.atom[isulfur_index]
            jsulfur = st.atom[jsulfur_index]

            # Ev:129699 Skip this pair if at least one is already bound to
            # something (perhaps from a previous iteration of this loop)
            pair_hydrogens = []
            skip = False
            for a in [isulfur, jsulfur]:
                num_heavy = 0
                for bond in a.bond:
                    if bond.order > 0:  # Ev:131161 Ignore zero-order bonds
                        n = bond.atom2
                        if n.element == 'H':
                            pair_hydrogens.append(n.index)
                        else:
                            num_heavy += 1
                if num_heavy > 1:
                    skip = True
            if skip:
                continue

            if st.measure(isulfur, jsulfur) <= dist:
                isulfur.addBond(jsulfur, 1)
                added_bonds.append((isulfur.index, jsulfur.index))
                isulfur.formal_charge = 0
                jsulfur.formal_charge = 0
                atoms_to_delete += pair_hydrogens

                ires = isulfur.getResidue()
                jres = jsulfur.getResidue()

                # Ev:88748 Change the resname of the combined residue to CYX:
                if ires.pdbres != "CYX " or jres.pdbres != "CYX ":
                    ires.pdbres = "CYX "
                    jres.pdbres = "CYX "
                    if verbose:
                        print(
                            '   Created a bond between %s and %s; changed residue names to "CYX "'
                            % (ires, jres))
                else:
                    if verbose:
                        print('   Created a bond between %s and %s' %
                              (ires, jres))

    # Must delete atoms at the end, in order to preserve atom numbers:
    st.deleteAtoms(atoms_to_delete)

    st.retype()

    return added_bonds


[docs]def convert_selenomethionines(st):
    """
    Convert MSE residues to METs.
    Returns a list of residue strings that were converted.
    """

    converted_residues = []
    for res in st.residue:
        if res.pdbres == 'MSE ':
            # Convert this Selenomethionine to a Methionine:
            res_str = str(res)
            converted_residues.append(res_str)
            for atom in res.atom:
                if atom.pdbname == 'SE  ':
                    atom.element = 'S'
                    atom.pdbname = ' SD '
                    atom.color = 'yellow'
            res.pdbres = 'MET'

    st.retype()

    return converted_residues


def _set_oxygen_orders(st, mainatom, double, single, charge):
    """
    Sets the orders of phosphate-oxygen or sulfate-oxygen bonds.

    mainatom - atom number of the phosphate or sulfur

    double - list of atom numbers of the oxygens to double-bond to

    single - list of atom numbers of oxygens to single-bond to, and set charge to <charge>.

    """

    st = st.copy()
    for anum in double:
        bond = st.getBond(mainatom, anum)
        bond.order = 2
        st.atom[anum].formal_charge = 0

    for anum in single:
        bond = st.getBond(mainatom, anum)
        bond.order = 1
        st.atom[anum].formal_charge = charge

    return st


def _generate_phosphate_states(st, atoms):
    """
    Returns a list of structures (expanded states)
    """
    # TODO move this code into the GenerateStatesTask class

    phosphate = atoms[0]
    double_oxygen = atoms[1]

    # Make a list of negative hydrogens and hydrogens with hydrogens:
    negative_oxygens = []
    hydrogen_oxygens = []

    # Iterate over all oxygens with single bond to Phosphate:
    for anum in atoms[2:]:
        if st.atom[anum].bond_total == 1:
            if st.atom[anum].formal_charge == -1:
                negative_oxygens.append(anum)
            else:
                hydrogen_oxygens.append(anum)

    # FIXME For now, do not deal with this Phosphate if it has any hydrogen
    # oxygens:
    if hydrogen_oxygens:
        return [st]

    if not negative_oxygens:
        # No states can be generated from this Phosphate
        return [st]

    # Alternate double bond between the oxygens:
    if len(negative_oxygens) == 1:
        st2 = _set_oxygen_orders(st, phosphate, [negative_oxygens[0]],
                                 [double_oxygen], -1)
        return [st, st2]
    elif len(negative_oxygens) == 2:
        st2 = _set_oxygen_orders(st, phosphate, [negative_oxygens[0]],
                                 [double_oxygen, negative_oxygens[1]], -1)
        st3 = _set_oxygen_orders(st, phosphate, [negative_oxygens[1]],
                                 [double_oxygen, negative_oxygens[0]], -1)
        return [st, st2, st3]
    else:
        print('ERROR: wrong number of negative oxygens:', len(negative_oxygens))
        return [st]


def _generate_sulfate_states(st, atoms):
    """
    Returns a list of structures (expanded states)
    """
    # TODO move this code into the GenerateStatesTask class

    sulfur = atoms[0]
    double_oxygen1 = atoms[1]
    double_oxygen2 = atoms[2]

    # Make a list of negative hydrogens and hydrogens with hydrogens:
    negative_oxygens = []
    hydrogen_oxygens = []

    # Iterate over all oxygens with single bond to Sulfur:
    for anum in atoms[3:]:
        if st.atom[anum].bond_total == 1:
            if st.atom[anum].formal_charge == -1:
                negative_oxygens.append(anum)
            else:
                hydrogen_oxygens.append(anum)

    # Alternate double bond between the oxygens:
    if len(negative_oxygens) == 1 and len(hydrogen_oxygens) == 0:
        # st1 is equivalent of:              [double_oxygen1, double_oxygen2], negative_oxygens[0]
        st2 = _set_oxygen_orders(st, sulfur,
                                 [double_oxygen1, negative_oxygens[0]],
                                 [double_oxygen2], -1)
        st3 = _set_oxygen_orders(st, sulfur,
                                 [double_oxygen2, negative_oxygens[0]],
                                 [double_oxygen1], -1)
        return [st, st2, st3]

    elif len(hydrogen_oxygens) == 1 and len(negative_oxygens) == 0:
        # st1 is equivalent of:              [double_oxygen1, double_oxygen2], hydrogen_oxygens[0]
        st2 = _set_oxygen_orders(st, sulfur,
                                 [double_oxygen1, hydrogen_oxygens[0]],
                                 [double_oxygen2], 0)
        st3 = _set_oxygen_orders(st, sulfur,
                                 [double_oxygen2, hydrogen_oxygens[0]],
                                 [double_oxygen1], 0)
        return [st, st2, st3]

    else:
        # No states can be generated from this Sulfate
        return [st]


[docs]def count_phosphates_and_sulfurs(st):

    # Find any phosphates:
    p_smarts = "P(=O)(O)(O)O"
    s_smarts = "S(=O)(=O)(O)O"

    p_matches = analyze.evaluate_smarts_canvas(st, p_smarts)
    s_matches = analyze.evaluate_smarts_canvas(st, s_smarts)

    main_atoms = set()
    for atoms in p_matches + s_matches:
        main_atoms.add(atoms[0])

    return len(main_atoms)


[docs]def extend_phosphate_states(st):
    """
    For specified structure, generates phosphate states, and returns list
    of output structures. Ev:78688

    NOTE: Output structure has no hydrogens.
    """
    # TODO move this code into the GenerateStatesTask class

    hydrogens = []
    for a in st.atom:
        if a.atomic_number == 1:
            hydrogens.append(a)
    st.deleteAtoms(hydrogens)

    # Find any phosphates:
    p_smarts = "P(=O)(O)(O)O"

    matches = analyze.evaluate_smarts_canvas(st, p_smarts)

    # Remove duplicates:
    matches_no_dups = []
    sorted_matches = []
    for match in matches:
        sorted_match = sorted(match)
        if sorted_match not in sorted_matches:
            sorted_matches.append(sorted_match)
            matches_no_dups.append(match)

    sts = [st]
    for atoms in matches_no_dups:
        all_extended_sts = []
        for st in sts:
            extended_sts = _generate_phosphate_states(st, atoms)
            all_extended_sts.extend(extended_sts)
        sts = all_extended_sts

    return sts


[docs]def extend_sulfate_states(st):
    """
    For specified structure, generates sulfate states, and returns list
    of output structures. Ev:82634
    """
    # TODO move this code into the GenerateStatesTask class

    hydrogens = []
    for a in st.atom:
        if a.atomic_number == 1:
            hydrogens.append(a)
    st.deleteAtoms(hydrogens)

    # Find any sulfates in the structure:
    s_smarts = "S(=O)(=O)(O)O"
    matches = analyze.evaluate_smarts_canvas(st, s_smarts)

    # Remove duplicates:
    matches_no_dups = []
    sorted_matches = []
    for match in matches:
        sorted_match = sorted(match)
        if sorted_match not in sorted_matches:
            sorted_matches.append(sorted_match)
            matches_no_dups.append(match)

    sts = [st]
    for i, atoms in enumerate(matches_no_dups):
        # For this sulfate, extend the states:
        all_extended_sts = []
        for j, st in enumerate(sts):
            extended_sts = _generate_sulfate_states(st, atoms)
            all_extended_sts.extend(extended_sts)
        sts = all_extended_sts

    # Add hydrogens, because hydrogens were added to some atoms:
    for st in sts:
        build.add_hydrogens(st)

    return sts


[docs]def get_chain_sequences(st, remove_tails=True):
    """
    Will read the PDB sequences from the sequence block, and will return a
    dictionary (keys: chain names; values: sequence strings).

    If remove_tails is True, will chop off tails that are not existent in
    the CT, but will leave in the missing loop sections.

    Will raise RuntimeError on an error, or mmerror on mmct failure.
    """

    st = st.copy()

    # Make a list of residues from the chain sequence that are in the CT:
    used_chain_indexes = {}  # key: chain; value: list of ints
    for chain in st.chain:
        chain_used_indexes = []
        for res in chain.residue:
            index = res.atom[1].property.get('i_pdb_seqres_index')
            if index is not None:
                chain_used_indexes.append(index)
        used_chain_indexes[chain.name] = chain_used_indexes

    # Get the PDB sequence strings for each chain:
    try:
        data_handle = mm.mmct_ct_m2io_get_unrequested_handle(st)
    except mm.MmException:
        data_handle = mm.mmct_ct_get_or_open_additional_data(st, True)

    chain_seq_dict = {}

    # Ev:124587 Do not attempt to read the sequence if it's not present:
    # will be 1 in sequence data exists:
    num_seqres_blocks = mm.m2io_get_number_blocks(data_handle, "m_PDB_SEQRES")
    if num_seqres_blocks == 0:
        raise RuntimeError("Input structure had no sequence block data")

    mm.m2io_goto_block(data_handle, "m_PDB_SEQRES", 1)

    num_rows = mm.m2io_get_index_dimension(data_handle)
    for i in range(1, num_rows + 1):
        chain_strings = mm.m2io_get_string_indexed(data_handle, i,
                                                   ["s_pdb_chain_id"])
        seqres_strings = mm.m2io_get_string_indexed(data_handle, i,
                                                    ["s_pdb_SEQRES"])
        chain = chain_strings[0]
        seqres = seqres_strings[0]
        chain_seq_dict[chain] = seqres

    chain_strings = list(chain_seq_dict)
    used_chains = list(used_chain_indexes)
    for chain in used_chains:
        if chain not in chain_strings:
            msg = 'WARNING: Chain "%s" did not have a sequence record' % chain
            print(msg)

    # Modify the sequence strings by chopping off tails:
    output_sequences = {}  # key: chain name; value: sequence string
    mm.mmseq_initialize(mm.error_handler)
    for chain, seqres in chain_seq_dict.items():
        if chain not in used_chains:
            # Ev:102107 Possibly the user has deleted this chain
            continue

        # Figure out what residues in this sequence are missing in the CT:
        used_indexes = used_chain_indexes[chain]
        # used_indexes is 1-indexed

        # list of tuples: index=res index; value: (one letter code, whether it
        # is used)
        sequence_list = []
        numres = len(seqres) // 4
        for i in range(numres):
            beg = i * 4
            end = i * 4 + 4
            resname = seqres[beg:end]
            onelett = mm.mmseq_get_one_letter_res_name(resname)
            # Ev:86135 i is zero-indexed; while the list is 1-indexed:
            used = bool(i + 1 in used_indexes)
            sequence_list.append((onelett, used))

        in_sequence = ""
        for onelett, used in sequence_list:
            in_sequence += onelett

        if remove_tails:
            # Chop off the leading tail:
            while sequence_list:
                onelett, used = sequence_list[0]
                if used:
                    break
                else:
                    sequence_list.pop(0)

            # Chop off the trailing tail:
            while sequence_list:
                onelett, used = sequence_list[-1]
                if used:
                    break
                else:
                    sequence_list.pop(-1)

        if not sequence_list:
            raise RuntimeError("All of sequence for chain %s was missing" %
                               chain)

        # Convert to a sequence string (excluding tails):
        out_sequence = ""
        for onelett, used in sequence_list:
            out_sequence += onelett
        output_sequences[chain] = out_sequence

    mm.mmseq_terminate()

    return output_sequences


[docs]def write_sequences_to_fasta(pdbid, sequences_dict, fastafile):

    pdbid = pdbid.upper()

    fh = open(fastafile, 'w')
    for chain, sequence in sequences_dict.items():
        numres = len(sequence)
        line1 = ">%s_%s, %i bases,\n" % (pdbid, chain, numres)
        line2 = sequence + '\n'

        fh.write(line1)
        fh.write(line2)

    fh.close()


[docs]def does_res_have_missing_side_chains(residue):
    """
    Given a _Residue object, returns True if the residue is missing
    side-chain atoms.  If at least one backbone atom is (also) missing,
    False is returned.

    Basically only residues for which Prime missing-side-chains job can
    be run will return True.
    """

    # Ev:96128 Use a new mechanism to determine which residues have missing
    # atoms. This method does NOT rely on the i_m_pdb_convert_problem
    # property, which may be out-of-date:

    # DPB names for the residue backbone atoms:
    backbone_names = {" C  ", " CA ", " N  ", " O  "}

    # Do not show DNA/RNA residues, because we can't fill missing side-chains
    # on them:
    pdbres = residue.pdbres
    if pdbres in [
            '  A ', '  C ', '  G ', '  U ', ' DA ', ' DC ', ' DG ', ' TD '
    ]:
        return False

    if not residue.hasMissingAtoms():
        return False

    heavy_elements = []
    heavy_atom_names = set()
    for atom in residue.atom:
        if atom.atomic_number > 1:
            heavy_elements.append(atom.element)
            heavy_atom_names.add(atom.pdbname)

    # Ev:116225 Ignore residues that have been modified to -NH2 caps:
    if heavy_elements == ["N"]:
        return False

    # Ev:126307 Ignore residues that have been modified to -NH-CH3 caps:
    if sorted(heavy_elements) == ["C", "N"]:
        return False

    # PPREP-794 Ignore residues that are missing any backbone atoms:
    # (Note that this makes the two tests above unnecessary)
    if not heavy_atom_names.issuperset(backbone_names):
        return False

    # If got here, the reisude has missing side-chain atoms:

    return True


[docs]def do_any_residues_have_missing_side_chains(st):
    """
    Returns True if at least one of the residue in the given structure
    has missing side-chain atoms (backbone atoms are ignored).
    """

    for res in st.residue:
        if does_res_have_missing_side_chains(res):
            return True
    return False


[docs]def fix_sulfur_charges(st):
    """
    Post process by fixing the charge on zero-order-bonded Sulfurs
    Gives -1 or -2 charge to Sulfurs as appropriate.
    Deletes a hydrogen from Sulfurs coordinating with metals
    (Ev:61622)
    """

    hydrogens_to_delete = []
    for sulfur in st.atom:
        if sulfur.element != 'S':
            continue

        bonded_to_iron = False
        # Bonds for non-metal neighbors to this Sulfur (excluding hydrogens)
        reg_bonds = 0
        zob_bonds = 0  # How many bonds to metals
        hydrogens = []  # Hydrogen atoms bonded to this Sulfur
        for bond in sulfur.bond:
            bo = bond.order
            n = bond.atom2
            if n.element == 'Fe':
                # Set charge of Iron in FeS clusters to +2
                n.formal_charge = +2
                bonded_to_iron = True
            if bo == 0:
                zob_bonds += 1
            elif n.element == 'H':
                hydrogens.append(n)
            else:  # regular bond to non-hydrogen atom
                reg_bonds += bo

        # Number of hydrogens depends on number of regular bonds + number of zobs:
        if reg_bonds + zob_bonds == 0:
            max_hydrogens = 2
        elif reg_bonds + zob_bonds == 1:
            max_hydrogens = 1
        elif reg_bonds + zob_bonds == 2:
            max_hydrogens = 0
        else:
            # This is an unexpected hybridization state. Do not delete any
            # hydrogens (PPREP-694). Unless this sulfur is bond to an iron,
            # in which case we will still delete all hydrogens from it, and will
            # adjust the formal charge (PPREP-944).
            if bonded_to_iron:
                max_hydrogens = 0
            else:
                # Unknown hybridization state and NOT bound to an iron; skip:
                continue

        # Delete all hydrogens that are above the number allowed:
        while len(hydrogens) > max_hydrogens:
            hydrogens_to_delete.append(hydrogens.pop(0))

        kept_hydrogens = len(hydrogens)

        # The charge depends on the number of regular bonds (zobs excluded):
        if kept_hydrogens + reg_bonds == 0:
            sulfur.formal_charge = -2
        elif kept_hydrogens + reg_bonds == 1:
            sulfur.formal_charge = -1
        elif kept_hydrogens + reg_bonds == 2:
            sulfur.formal_charge = 0

    if hydrogens_to_delete:
        st.deleteAtoms(hydrogens_to_delete)

    st.retype()  # Correct the MacroModel atom types.


[docs]def prepare_for_epik(st, het_asls, app=None):
    """
    Extract the structures to run Epik on from the input complex CT.
    Atoms in the original structure will be marked with i_ppw_anum property,
    to make it possible to match Epik output with the input structure.
    """

    # Number each atom so that after Epik we know what they were:
    for atom in st.atom:
        atom.property[ORIG_ANUM_PROP] = atom.index

    # Ev:99175 Do not modify the original structure:
    st = st.copy()

    sts_to_run_epik_on = []
    sts_to_not_run_epik_on = []

    # Delete any zero-order-bonds (Ev:79718). Needed in order for Epik
    # to generate states correctly for zero-order-bonded ligands.
    for atom in st.atom:
        num_bonds = len(atom.bond)
        # Reverse order so that bond deletion does not affect the loop:
        for i in range(num_bonds, 0, -1):
            bond = atom.bond[i]
            if bond.order == 0:
                st.deleteBond(atom, bond.atom2)

    # Display an error if any of the hets are missing:
    for het_asl in het_asls:

        # Get Structure for het atoms:
        het_atoms = analyze.evaluate_asl(st, het_asl)
        hetnum = st.atom[het_atoms[0]].property[HET_NUM_PROP]
        het_only_st = st.extract(het_atoms)

        # Get Structure for het atoms and nearby atoms bonded to it:
        near_het_asl = "(withinbonds 2 (%s))" % het_asl
        near_heavyatoms = analyze.evaluate_asl(st, near_het_asl)
        near_heavyatoms_st = st.extract(near_heavyatoms)
        near_heavyatoms_st.property['i_ppw_hetnum'] = hetnum  # For records

        # Add hydrogens to the near-het atoms, so that Epik does not
        # produce errors regarding their lewis structure:
        extra_atoms_asl = "not (%s) and (not atom.ele H)" % het_asl
        extra_atoms_list = analyze.evaluate_asl(near_heavyatoms_st,
                                                extra_atoms_asl)
        build.add_hydrogens(near_heavyatoms_st, atom_list=extra_atoms_list)

        try:
            # Ev:91655
            check_st_valences(near_heavyatoms_st)
        except RuntimeError:
            msg = 'Warning: Some atoms in het %s had too many bonds. Will not generate states for this het.' % hetnum
            if app:
                app.warning(msg)
            else:
                print(msg)

            sts_to_not_run_epik_on.append(near_heavyatoms_st)
            continue

        # If het consists of only one metal, do not run Epik on it:
        if len(het_only_st.atom) == 1 and \
                het_only_st.atom[1].atomic_number in metal_charges:
            # Het contains only one metal atom
            sts_to_not_run_epik_on.append(near_heavyatoms_st)
        else:  # not just one metal"
            st_run_epik = False
            for a in het_only_st.atom:
                if not a.pdbres.strip() in EPIK_EXCLUSION_LIST:
                    st_run_epik = True
                    break

            # And all residues of het are in EPIK_EXCLUSION list, do not run
            # Epik:
            if st_run_epik:  # Non-metal non-excluded atoms are present:
                sts_to_run_epik_on.append(near_heavyatoms_st)
            else:
                sts_to_not_run_epik_on.append(near_heavyatoms_st)

    return sts_to_run_epik_on, sts_to_not_run_epik_on


[docs]def filter_undesired_states(orig_st, state_sts):
    """
    Returns a subset of state structures, which excludes metal-binding
    states for hets that are not within 5A of a metal.

    :param orig_st: Original complex structure (receptor, ligands, metals)
    :type orig_st: `structure.Structure`

    :param state_st: Epik output states to filter.
    :type state_st: Iterable of `structure.Structure`

    :return: List of filtered structures.
    :rtype: List of `structure.Structure`
    """

    filtered_states = []

    # For each het, whether it's within 5A of a metal:
    is_near_metal_dict = {}

    for state_st in state_sts:
        # Determine whether this is a metal-only state that should be skipped:
        # NOTE: If this property is not present, then Epik failed to generate
        # states for this structure - which means the output state is
        # effectively NOT "Metal only", and should be kept.
        if state_st.property.get('b_epik_Metal_Only'):
            hetnum = state_st.property['i_ppw_hetnum']
            if hetnum not in is_near_metal_dict:
                asl = "(atom.%s %i) AND (within 5 metals)" % (HET_NUM_PROP,
                                                              hetnum)
                matches = analyze.evaluate_asl(orig_st, asl)
                is_near_metal_dict[hetnum] = bool(matches)

            if not is_near_metal_dict[hetnum]:
                continue
        filtered_states.append(state_st)
    return filtered_states


[docs]def find_ppw_atom(st, anum):
    """
    Find the atom in the given structure whose i_ppw_anum property is set to
    the given value. ValueError exception is raised if such atom is not found.
    """
    matches = analyze.evaluate_asl(st, "(atom.%s %i)" % (ORIG_ANUM_PROP, anum))
    if not matches:
        raise ValueError("No atom with i_ppw_anum=%s was found" % anum)
    return st.atom[matches[0]]


[docs]def apply_state(complex_st, state_st):
    """
    Modify the het in complex_st complex (protein/ligand) structure such that
    its ionization state matches the output that we got from Epik (state_st).

    :param complex_st: Original complex structure (receptor + het)
    :type complex_st: `structure.Structure`

    :param state_st: Epik output for the het group. May include some atoms
            from the receptor if the ligand is covalently-bound.
    :type state_st: `structure.Structure`

    :return: List of atom indices in complex_st that are part of the het.
    :rtype: (dict, list)
    """

    heavy_atoms = []

    for state_atom in state_st.atom:
        if state_atom.element == 'H':
            continue  # Skip hydrogens
        state_anum = state_atom.property.get(ORIG_ANUM_PROP)
        if state_anum is None:
            # Skip protein atoms for covalently bound ligands
            continue

        complex_atom = find_ppw_atom(complex_st, state_anum)

        ahet = state_atom.property.get(HET_NUM_PROP)
        heavy_atoms.append(complex_atom.index)

        # Iterate over all bonds that this atom makes, and adjust them to
        # match Epik output:
        for bond in state_atom.bond:
            state_n_atom = bond.atom2
            if state_n_atom.element == 'H':
                continue  # Skip hydrogens
            neighbor_anum = state_n_atom.property.get(ORIG_ANUM_PROP)
            if neighbor_anum is None:
                # Skip protein atoms for covalently bound ligands
                continue

            nhet = state_n_atom.property.get(HET_NUM_PROP)

            # Skip this bond if the neighbor is NOT from the same het (it's
            # part of the protein to which the ligand is covalently bound - we
            # include some of the protein atoms in the Epik input).
            if nhet != ahet:
                continue

            # Modify the original full receptor structure:
            # Ev:96171 Determine which 2 atoms in <complex_st> correspond to this bond:
            neighbor_atom = find_ppw_atom(complex_st, neighbor_anum)
            complex_st.getBond(complex_atom, neighbor_atom).order = bond.order

        # Ev:134735 Determine which protein atom corresponds to this atom:
        complex_atom.formal_charge = state_atom.formal_charge

    # Need to re-type after changing bond orders and formal charges:
    complex_st.retype()

    # Delete and re-add het hydrogens in the complex structure:
    hydrogens_to_delete = hydrogen_neighbors(complex_st, heavy_atoms)

    renum_map = complex_st.deleteAtoms(hydrogens_to_delete, renumber_map=True)
    heavy_atoms = [renum_map[anum] for anum in heavy_atoms]

    natoms = complex_st.atom_total
    build.add_hydrogens(complex_st, atom_list=heavy_atoms)

    het_atoms = heavy_atoms + list(range(natoms + 1, complex_st.atom_total + 1))

    return het_atoms


[docs]def apply_state_and_calc_score(complex_st, state_st):
    """
    Apply the state <state_st> to <complex_st>, and return the score for the
    state in the context of the protein complex.

    :param complex_st: Original complex structure (receptor + het)
    :type complex_st: `structure.Structure`

    :param state_st: Epik output for the het group. May include some atoms
            from the receptor if the ligand is covalently-bound.
    :type state_st: `structure.Structure`

    :return: Tuple of state score, Epik penalty, and information string.
            Epik penalty will be None for metal states.
    :rtype: (float, float/None, str)
    """
    # TODO Move this to be an instance of HetState in the future
    het_atoms = apply_state(complex_st, state_st)
    state_penalty, metal_penalties = get_state_penalties(state_st)
    state_score, info_str = calc_state_score(complex_st, het_atoms,
                                             state_penalty, metal_penalties)
    return state_score, state_penalty, info_str


[docs]def get_state_penalties(state_st):
    """
    Return the Epik state penalty for this Epik output structure, as well as
    metal penalties for each atom that has the r_epik_Metal_State_Penalty
    property set.
    """
    # Metal states don't go through Epik, so don't have a penalty.
    # Setting it to zero here will effectively prevent sorting of
    # states for metal hets.
    state_penalty = state_st.property.get(STATE_PANELTY_PROP, 0.0)

    metal_penalties = {}
    for state_atom in state_st.atom:
        state_anum = state_atom.property.get(ORIG_ANUM_PROP)
        if state_anum is None:
            # This as an added hydrogen
            continue
        metal_penalty = state_atom.property.get(METAL_PENALTY_PROP)
        if metal_penalty is not None:
            metal_penalties[state_anum] = metal_penalty

    return state_penalty, metal_penalties


[docs]def get_smallest_metal_penalty(complex_st, metal_penalties):
    """
    Return the lowest metal penalty of all het atoms, considering only atoms
    that are within 3A of a receptor metal.
    """

    all_metals = analyze.evaluate_asl(complex_st, 'metals')

    smallest_metal_penalty = None
    for anum, metal_penalty in metal_penalties.items():
        try:
            complex_atom = find_ppw_atom(complex_st, anum)
        except ValueError:
            # This hydrogen was added by Epik and is not in the input structure.
            continue

        # Check if any metal within 3A exists AND this atom has a lower
        # metal penalty than previously seen atoms close to metals.
        if smallest_metal_penalty is None or metal_penalty < \
                smallest_metal_penalty:
            for metal_anum in all_metals:
                if complex_st.measure(metal_anum, complex_atom) <= 3:  # 3A
                    smallest_metal_penalty = metal_penalty
                    break
    return smallest_metal_penalty


def _generate_metal_states(st):
    """
    Ionize the metal atom in st (only one, if exists) and return a list of ionized sts
    """

    out_states = []

    # Ionize the metal:
    for atom in st.atom:
        charges = metal_charges.get(atom.atomic_number)
        if charges:
            # The original state should be the first state (Ev:65897):
            out_states.append(st)
            for ch in charges:
                if ch != atom.formal_charge:
                    stc = st.copy()
                    stc.atom[int(atom)].formal_charge = ch
                    stc.retype()
                    out_states.append(stc)
    if not out_states:
        # No metal in <st>, return the original:
        out_states.append(st)
    return out_states


[docs]def generate_special_states(sts, app=None):
    # If Epik is not run, the following is done only to the original het:

    processed_sts = []
    skipped_phosphate_hetnums = set()

    for st in sts:
        # For each input state, generate metal and phosphate states
        # Ev:78688
        hetnum = st.property['i_ppw_hetnum']

        extend_ps = True
        if count_phosphates_and_sulfurs(st) > 4:
            skipped_phosphate_hetnums.add(hetnum)
            extend_ps = False

        out_sts = []
        for st2 in _generate_metal_states(st):
            if extend_ps:
                for st3 in extend_phosphate_states(st2):
                    for st4 in extend_sulfate_states(st3):
                        out_sts.append(st4)
            else:
                out_sts.append(st2)

        for stm in out_sts:
            processed_sts.append(stm)

    if skipped_phosphate_hetnums:
        str_list = [str(hetnum) for hetnum in skipped_phosphate_hetnums]
        msg = "Will not extend phosphates and sulfates for these hets: %s\n(because there are too many of them per het)" % ", ".join(
            str_list)
        if app:
            app.warning(msg)
        else:
            print(msg)

    return processed_sts


def _find_waters(st):
    """
    Return a list of water atoms. Each item in the list is a tuple of 3 water atoms.
    """

    water_asl = 'water and (atom.ele O)'
    water_atoms = analyze.evaluate_asl(st, water_asl)

    waters_list = []

    for ai in water_atoms:
        a = st.atom[ai]

        # Make a list of the atoms of this water:
        curr_water_atoms = [ai]
        for h in a.bonded_atoms:
            if h.element == 'H':
                curr_water_atoms.append(h.index)

        if len(curr_water_atoms) != 3:
            print('    ERROR skipping water since it has %i hydrogens' %
                  (len(curr_water_atoms) - 1))

        curr_water_atoms = tuple(curr_water_atoms)

        waters_list.append(curr_water_atoms)

    return waters_list


[docs]def get_bridging_waters(st, min_hbonds=3):
    """
    Return a list of all waters in the specified structure that make at least
    <min_hbonds> number of H-bonds (H-bonds to other waters excluded).
    The list contains both oxygen and hydrogen atoms (if present).
    """

    waters_list = _find_waters(st)
    all_water_atoms = set()
    for water_mol_atoms in waters_list:
        all_water_atoms.update(water_mol_atoms)

    def _get_num_nonwater_hbonds(water_mol_atoms):
        count = 0

        for (atom1, atom2) in hbond.get_hydrogen_bonds(st, water_mol_atoms):
            anum1 = int(atom1)
            anum2 = int(atom2)
            if anum1 in water_mol_atoms:
                other_atom = anum2
            else:
                other_atom = anum1
            if other_atom not in all_water_atoms:
                count += 1
        return count

    bridging_water_atoms = []
    for water_atoms in waters_list:
        num_hbonds = _get_num_nonwater_hbonds(water_atoms)
        if num_hbonds >= min_hbonds:
            bridging_water_atoms += water_atoms

    return bridging_water_atoms


[docs]def hydrogen_neighbors(st, atoms):
    """
    Returns the list of neighbor (bonded) atoms that are hydrogens.

    :param st: Structure where atoms are from.
    :type st: `structure.Structure`

    :param atoms: List of atom indices for the heavy atoms.
    :type atoms: list of ints

    :return: List of hydrogen atom indices
    :rtype: list of ints.
    """

    hydrogens = []
    for heavy_atom in atoms:
        for bonded_atom in st.atom[heavy_atom].bonded_atoms:
            if bonded_atom.index not in hydrogens and bonded_atom.atomic_number == 1:
                hydrogens.append(bonded_atom.index)
    return hydrogens


[docs]def get_pdb_id(st):
    """
    Returns the PDB ID of the given structure. If the property does not exist or
    contains just white spaces, returns the string "unknown".
    """
    pdbid = st.property.get("s_pdb_PDB_ID")
    if not pdbid or not pdbid.strip():
        return "unknown"
    return pdbid.strip().lower()


[docs]def get_het_name(st, het_atoms):
    """
    Return the het "name" to display for the user for the given het.
    Example outputs::

        A:HEM (123a)
        A:GLY-VAL-PRO

    :param st: Structure containing the het
    :type st: `schrodinger.structure.Structure`

    :param het_atoms: Atoms from this het.
    :type het_atoms: list of ints
    """
    hetlist = []
    for ai in het_atoms:
        a = st.atom[ai]

        # Append atom's CHAIN, RESNAME, RESNUM, INSCODE:
        item = (a.chain, a.pdbres.strip(), a.resnum, a.inscode.strip())
        if item not in hetlist:
            hetlist.append(item)

    if len(hetlist) == 1:
        # If all atoms are from the same residue, use format "A:PRO (123a)"
        hetname_str = '%s:%s (%i%s)' % hetlist[0]
    else:
        # For more than one resname, do not include renums:
        chains_used = set([item[0] for item in hetlist])
        if len(chains_used) == 1:
            # Use format:  A:aaa-bbb-ccc
            het_names = []
            for item in hetlist:
                het_names.append(item[1])
            hetname_str = chains_used.pop() + ':' + '-'.join(het_names)
        else:
            # Use format:  A:aaa-B:bbb-C:ccc
            het_names = []
            for item in hetlist:
                het_names.append('%s:%s' % (item[0], item[1]))
            hetname_str = '-'.join(het_names)
    return hetname_str


[docs]def add_prepared_props(st):
    """
    Adds "prepared" and "prepared with version" properties to the given
    structure.
    """
    st.property['b_ppw_prepared'] = True
    st.property['s_ppw_prepared_with_version'] = schrodinger.get_release_name()


[docs]def generate_bio_units(st):
    """
    If the given structure has Biounit data, generate conformations for
    each biounit. If multiple bio units are defined, multiple structures
    are returned - oner per bio unit.
    If no biounit data is present, original structure is returned (in list).
    """
    biounits = biounit.biounits_from_structure(st)
    if not biounits:
        return [st]
    sts = [biounit.apply_biounit(st, bu) for bu in biounits]
    return sts