Source code for schrodinger.structutils.analyze

"""
Functions for analyzing `Structure objects<Structure>`.

`AslLigandSearcher` is a class that identifies putative ligands in a structure.
Each putative found ligand is contained in a `Ligand` instance.

There are also a number of functions for using SMARTS, ASL, and SMILES (e.g.
`evaluate_smarts_canvas` or `generate_smiles`).  Other functions return information
about a structure (i.e. `get_chiral_atoms` or `hydrogens_present`). There are
also several SASA (Solvent Accessible Surface Area) functions (i.e.
`calculate_sasa_by_atom` and `calculate_sasa_by_residue` and `calculate_sasa`).

See also the discussion in the Python API overview.

@copyright: Schrodinger, LLC. All rights reserved.
"""

from typing import Iterable
from typing import List
from typing import Optional
from typing import Set
import collections
import itertools
import math
import time
import warnings

import networkx
import numpy

import schrodinger.structutils.measure
from schrodinger import geometry
from schrodinger import get_maestro
from schrodinger import adapter
from schrodinger.infra import canvas
from schrodinger.infra import mm
from schrodinger.infra import mmbitset
from schrodinger.infra import mmerr
from schrodinger.infra import mminit
from schrodinger.infra import mmlist
from schrodinger.infra.mmbitset import Bitset
from schrodinger.structure import NO_STEREO  # noqa: F401
from schrodinger.structure import RESIDUE_MAP_3_TO_1_LETTER  # noqa: F401
from schrodinger.structure import STEREO_FROM_ANNOTATION_AND_GEOM
from schrodinger.structure import Structure
from schrodinger.structure import _Ring
from schrodinger.structure import create_new_structure
from schrodinger.structure import get_residues_by_connectivity
from schrodinger.structutils import transform
from schrodinger.structutils import minimize
from schrodinger.structutils.interactions import hbond
from schrodinger.test import ioredirect
from schrodinger.utils import log
from schrodinger.math import mathutils

smiles = None
_canvas_license = None
_canvas = None

logger = log.get_logger("schrodinger.structutils.analyze")

_initializer = mminit.Initializer(
    [
        mm.mmstereo_initialize, mm.mmpatty_initialize, mm.mmsubs_initialize,
        mm.mmasl_initialize, mm.mmhtreat_initialize, mm.mmcrystal_initialize,
        mm.mmsmiles_initialize
    ],
    [
        mm.mmstereo_terminate, mm.mmpatty_terminate, mm.mmsubs_terminate,
        mm.mmasl_terminate, mm.mmhtreat_terminate, mm.mmcrystal_terminate,
        lambda: None
    ]  # Currently terminating and then reinitializing mmsmiles
    # causes a problem, so don't terminate.
)

RDKIT_MAX_MATCHES = 1000000000  # See DESMOND-11242 and RB 65689


def _get_chiralities(stereo_handle):
    """
    Helper for get_chiral_atoms(st) and get_chiralities(st).

    :type stereo_handle: int
    :param stereo_handle: mmstereo handle for the Structure being
             worked on.

    :rtype: dict
    :return: Dictionary of chiralities keyed off atom index.

    """
    rs = {
        mm.MMSTEREO_CHIRALITY_R: "R",
        mm.MMSTEREO_CHIRALITY_S: "S",
        mm.MMSTEREO_CHIRALITY_ANR: "ANR",
        mm.MMSTEREO_CHIRALITY_ANS: "ANS",
    }  # NOTE: Any other chirality will be returned as "undef"

    (atoms, chiralities) = mm.mmstereo_stereo_atoms(stereo_handle)

    at = mmlist.mmlist(atoms)
    ch = mmlist.mmlist(chiralities)

    result = {}
    for i in range(0, len(at)):
        anum = at[i]
        chirality = ch[i]
        result[anum] = rs.get(chirality, 'undef')

    return result


[docs]def get_chiral_atoms(structure):
    """
    Return a dictionary of chiral atoms, for which the key is the atom index
    and the value is one of the following strings: "R", "S", "ANR", "ANS",
    "undef".

    ANR and ANS designate "chiralities" of non-chiral atoms that are
    important for determining the structure of the molecule (ex: cis/trans
    rings).

    :type structure: `Structure`
    :param structure: Chirality of atoms within this structure will be
            determined.

    :rtype: dict
    :return: Dictionary of chiralities keyed off atom index.
    """

    stereo = mm.mmstereo_new(structure)

    result = _get_chiralities(stereo)

    mm.mmstereo_delete(stereo)

    return result


[docs]def get_chiralities(structure):
    """
    Return a dictionary of chiral atoms, for which the key is the atom index
    and the value is a tuple of one of the following strings: "R", "S", "ANR",
    "ANS", "undef" and the list of CIP ranked neighbors.

    ANR and ANS designate "chiralities" of non-chiral atoms that are
    important for determining the structure of the molecule (ex: cis/trans
    rings).

    :type structure: `Structure`
    :param structure: Chirality of atoms within this structure will be
            determined.

    :rtype: dict
    :return: Dictionary of chiralities keyed off atom index.
    """

    stereo = mm.mmstereo_new(structure)

    result = _get_chiralities(stereo)

    for at in result.keys():
        ch = result[at]
        bonded_list = mm.mmstereo_get_bonded_mmlist(stereo, at)
        bonded = mmlist._mmlist_to_pylist(bonded_list)
        result[at] = (ch, tuple(bonded))

    mm.mmstereo_delete(stereo)

    return result


[docs]def evaluate_smarts(structure,
                    smarts_expression,
                    verbose=False,
                    first_match_only=False,
                    unique_sets=False):
    """:deprecated: Use `schrodinger.adapter.evaluate_smarts` instead."""

    msg = ("analyze.evaluate_smarts() is deprecated. Use "
           "schrodinger.adapter.evaluate_smarts() instead.")
    warnings.warn(msg, DeprecationWarning, stacklevel=2)

    return_list = []

    smarts_expression = smarts_expression.strip()

    if len(smarts_expression) == 0:
        return return_list

    # Explicitly compile the pattern because it makes it easier to catch
    # pattern errors.
    try:
        p = mm.mmpatty_new(smarts_expression)
    except mm.MmException as e:
        if e.rc == mm.MMPATTY_INVALID_PATTERN:
            msg = 'Invalid smarts expression: "%s"' % smarts_expression
            raise ValueError(msg)

    mm.mmpatty_start_ct(structure, verbose)
    matches = mm.mmpatty_match_comp(p, first_match_only, verbose)
    for match in matches:
        return_list.append(mmlist._mmlist_to_pylist(match))
        mm.mmlist_delete(match)
    mm.mmpatty_finished_ct()
    mm.mmpatty_delete(p)

    if unique_sets:
        filtered_list = []
        unique = set()
        for item in return_list:
            key = tuple(sorted(item))
            if key not in unique:
                filtered_list.append(item)
                unique.add(key)
        return_list = filtered_list

    return return_list


[docs]def validate_smarts(smarts):
    """:deprecated: Use `schrodinger.adapter.validate_smarts` instead."""

    msg = ("analyze.validate_smarts() is deprecated. Use "
           "schrodinger.adapter.validate_smarts() instead.")
    warnings.warn(msg, DeprecationWarning, stacklevel=2)

    try:
        p = mm.mmpatty_new(smarts)
        return [True, ""]
    except mm.MmException as e:
        msg = 'Invalid smarts expression: "%s"' % smarts
        return [False, msg]


[docs]def count_atoms_in_smarts(smarts):
    """
    Return the number of atoms that the given SMARTS pattern has.

    :param smarts: SMARTS pattern
    :type smarts: str

    :return: Number of atoms in the pattern
    :rtype: int

    :raises: ValueError is the pattern is invalid.
    """

    # Supress errors so that they don't print to stdout:
    error_handle = mm.mmpatty_get_error_handler()
    with mmerr.Level(mm.MMERR_OFF, error_handle):
        try:
            patty = mm.mmpatty_new(smarts)
        except mm.MmException as e:
            if e.rc == mm.MMPATTY_INVALID_PATTERN:
                raise ValueError(f'Invalid SMARTS pattern: "{smarts}"')
            else:
                raise
        else:
            # Get the number of atoms of the pattern:
            natoms = mm.mmpatty_num_atoms(patty)
            mm.mmpatty_delete(patty)
            return natoms


[docs]def lazy_canvas_import():
    """
    Initialize _canvas and smiles variables.  Since the canvas modules may
    take some time to import, before it an function loading time.
    """
    global _canvas_license, _canvas, smiles
    if _canvas_license is None:
        if _canvas is None:
            import schrodinger.application.canvas.base
            _canvas = schrodinger.application.canvas.base
        import schrodinger.application.canvas.utils
        _canvas_license = schrodinger.application.canvas.utils.get_license(
            schrodinger.application.canvas.utils.LICENSE_SHARED)
    if smiles is None:
        import schrodinger.structutils.smiles as smiles


[docs]def validate_smarts_canvas(smarts):
    """:deprecated: Use `schrodinger.adapter.validate_smarts` instead."""

    lazy_canvas_import()
    return _canvas.ChmQuery.validateSMARTS(smarts)


[docs]def evaluate_smarts_canvas(structure,
                           smarts,
                           stereo=STEREO_FROM_ANNOTATION_AND_GEOM,
                           start_index=1,
                           uniqueFilter=True,
                           allowRelativeStereo=False,
                           rigorousValidationOfSource=False,
                           hydrogensInterchangeable=False,
                           multiple_smarts=False):
    """:deprecated: Use `schrodinger.adapter.evaluate_smarts` instead."""

    lazy_canvas_import()

    if isinstance(structure, _canvas.ChmMol):
        mol = structure
    else:
        mol = create_chmmol_from_structure(structure, stereo=stereo)

    smarts = smarts if multiple_smarts else [smarts]

    ret = []
    for pattern in smarts:
        pattern = pattern.strip()
        q = _canvas.ChmQuery(pattern, True)
        m = _canvas.ChmQueryMatcher(uniqueFilter, allowRelativeStereo,
                                    rigorousValidationOfSource,
                                    hydrogensInterchangeable)

        matches = []
        for match in m.match(q, mol):
            matches.append([
                a.getMolIndex() + start_index for a in match.getMatchedAtoms()
            ])

        ret.append(matches)

    return ret if multiple_smarts else ret[0]


[docs]def evaluate_smarts_by_molecule(structure,
                                smarts,
                                timing_data=None,
                                canvas=True,
                                use_rdkit=False,
                                matches_by_mol=False,
                                molecule_numbers=None,
                                **kwargs):
    """
    Takes a structure and a SMARTS pattern and returns a list of all
    matching atom indices, where each element in the list is a group
    of atoms that match the the SMARTS pattern.  The advantage of this
    function over evaluate_smarts_canvas is that it does a SMARTS match
    for each molecule in a structure rather than over the entire
    structure at once.  SMARTS evaluation scales as N^2 with the size
    of the structure searched.  Doing many SMARTS evaluations over small
    molecules will have a significant speedup over one SMARTS evaluation
    over a composite structure.  The return value of this function is
    identical to the return value of the evaluate_smarts_canvas function (or
    evaluate_smarts function if canvas=False)
    with the possible exception of the order of the matches.  Do not use
    this function if the SMARTS match can span molecules.  This simply
    fails to match invalid SMARTS patterns and also discards any empty
    matches.

    Additional keyword arguments are passed to the SMARTS matching function

    :type structure: structure.Structure
    :param structure: the structure to search

    :type smarts: str
    :param smarts: the SMARTS pattern to match

    :type timing_data: dict or None
    :param timing_data: If supplied this dict will be filled with timing data
        for the SMARTS finding. Data will be recorded for each molecule searched.
        Keys will be the number of atoms in a molecule, each value will be a list.
        Each item in the list will be the time in seconds it took to search a
        molecule with that many atoms.

    :type canvas: bool
    :param canvas: If True, use Canvas SMARTS matching, if False, use mmpatty

    :param bool use_rdkit: Whether to use RDKIT. Cannot be used together with
        canvas

    :type matches_by_mol: bool
    :param matches_by_mol: if True then rather than returning a list of matches
        return a dictionary of matches key-ed by molecule number

    :type molecule_numbers: set
    :param molecule_numbers: set of molecule numbers in the structure to be used
        instead of the entire structure

    :rtype: list or dict
    :return: For the list (if matches_by_mol is False) each value is a list of
        atom indices matching the SMARTS pattern, for the dict (if matches_by_mol
        is True) keys are molecule indices and values are lists of matches for that
        molecule
    """

    assert not (canvas and use_rdkit), ('"canvas" and "use_rdkit" cannot be '
                                        'both requested.')

    if structure is None:
        return []

    matches = {}

    # SMARTs patterns are intra-molecular
    # so we iterate over molecules in struct
    if molecule_numbers is None:
        molecules = structure.molecule
    else:
        # Using enumerate as structure.molecule[x] is very slow #SHARED-6204
        molecules = [
            mol for mol in structure.molecule if mol.number in molecule_numbers
        ]
    for mol in molecules:
        new_to_old = \
            dict(list(zip(list(range(1, len(mol.atom) + 1)), mol.getAtomIndices())))
        try:
            if timing_data is not None:
                start = time.time()

            if canvas:
                match = evaluate_smarts_canvas(mol.extractStructure(), smarts,
                                               **kwargs)
            elif use_rdkit:
                uniquify = kwargs.get('unique_sets', True)
                match = adapter.evaluate_smarts(mol.extractStructure(), smarts,
                                                uniquify, RDKIT_MAX_MATCHES)
            else:
                match = evaluate_smarts(mol.extractStructure(), smarts,
                                        **kwargs)
            if timing_data is not None:
                end = time.time()
                timing_data.setdefault(len(mol.atom), []).append(end - start)

            # Remove empty matches and use structure atom indexes
            # rather than by-molecule atom indexes
            for a_match in match:
                if a_match:
                    b_match = [new_to_old[x] for x in a_match]
                    matches.setdefault(mol.number, []).append(b_match)

        except mm.MmException:
            pass

    if not matches_by_mol:
        matches = [
            match for mol_match in list(matches.values()) for match in mol_match
        ]

    return matches


[docs]def evaluate_multiple_smarts(structure,
                             smarts_list,
                             verbose=False,
                             first_match_only=False,
                             unique_sets=False,
                             keep_nested=False):
    """
    Search for multiple SMARTS substructures in `Structure` `structure`.

    Return a list of lists of ints. Each list of ints is a list of atom
    indices matching a SMARTS pattern. The multiple SMARTS patterns are
    combined into one list.

    :type structure: `Structure`
    :param structure: Structure to search for matching substructures.

    :type smarts_list: list
    :param smarts_list: List of SMARTS patterns to look for.

    :type verbose: bool
    :param verbose:
            If True, print additional progress reports from the C
            implementation.

    :type first_match_only: bool
    :param first_match_only:
            If False, return all matches for a given starting atom - e.g. [1, 2,
            3, 4, 5, 6] and [1, 6, 5, 4, 3, 2] from atom 1 of benzene with the
            smarts_expression 'c1ccccc1'. If True, return only the first match
            found for a given starting atom. Note that setting first_match_only
            to True does not affect matches with different starting atoms - i.e.
            benzene will still return six lists of ints for 'c1ccccc1', one for
            each starting atom. To match only once per set of atoms, use
            unique_sets=True. Note also that setting first_match_only to True
            does not guarantee that all matching atoms will be found.

    :type unique_sets: bool
    :param unique_sets:
            If True, the returned list of matches will contain a single
            (arbitrary) match for any given set of atoms. If False, return the
            uniquely ordered matches, subject to the behavior specified by the
            first_match_only parameter.

    :rtype: list
    :return: Each value is a list of atom indices matching the SMARTS patterns.
    """

    if len(smarts_list) == 0:
        return []

    return_list = [[] for idx in range(len(smarts_list))]

    mm.mmpatty_start_ct(structure, verbose)

    # Iterate over smarts patterns and get matches for each:
    for idx, smarts_expression in enumerate(smarts_list):
        if len(smarts_expression) == 0:
            continue

        try:
            p = mm.mmpatty_new(smarts_expression)
        except mm.MmException as e:
            if e.rc == mm.MMPATTY_INVALID_PATTERN:
                msg = 'Invalid smarts expression: "%s"' % smarts_expression
                raise ValueError(msg)

        matches = mm.mmpatty_match_comp(p, first_match_only, verbose)

        for match in matches:
            # List of atoms matching the smarts_expression:
            match_list = mmlist._mmlist_to_pylist(match)
            return_list[idx].append(match_list)
            mm.mmlist_delete(match)
        mm.mmpatty_delete(p)

    mm.mmpatty_finished_ct()

    if unique_sets:
        for idx in range(len(return_list)):
            matches = return_list[idx]
            filtered_list = []
            unique = set()
            for item in return_list[idx]:
                key = tuple(sorted(item))
                if key not in unique:
                    filtered_list.append(item)
                    unique.add(key)
            return_list[idx] = filtered_list

    if not keep_nested:
        ret = [match for matches in return_list for match in matches]
        return ret

    return return_list


[docs]def evaluate_substructure(st, subs_expression, first_match_only=False):
    """
    Search for the MacroModel-style substructure expression in `Structure`
    `st` and return the atoms that match. For each match a list of atom
    indices is returned.

    :type st: `Structure`
    :param st: Structure to search for matching substructures.

    :type subs_expression: str
    :param subs_expression: MacroModel-style substructure expression to use
            for the search.

    :type first_match_only: bool
    :param first_match_only:
            If True, return only the first match found.

    :rtype: list
    :return: List of atom indices match the substructure expression.
    """

    return_list = []

    # I'm compiling the pattern here because it makes it easier to catch
    # pattern errors.
    p = mm.mmsubs_new(subs_expression)
    matches = mm.mmsubs_match_comp(p, st, first_match_only)
    for match in matches:
        return_list.append(mmlist._mmlist_to_pylist(match))
        mm.mmlist_delete(match)
    mm.mmsubs_delete(p)

    return return_list


[docs]def generate_asl(st, atom_list):
    """
    Generate and return an atom expression for the atoms in `Structure`
    `st` which are listed in `atom_list`. The ASL expression will be as
    compact as possible using mol, res and atom expressions where
    appropriate.

    :type st: `Structure`
    :param st: Structure holding the ASL atoms.

    :type atom_list: list
    :param atom_list: List of indices of atoms for which ASL is desired.

    :rtype: str
    :return: ASL compactly describing the atoms in atom_list.
    """

    # Start by creating a bitset from the atom list
    bs = Bitset.from_list(st.atom_total, atom_list)

    return mm.mmasl_generate_asl(bs, int(st))


[docs]def generate_residue_asl(residues):
    """
    Create an ASL representing the residues.

    Inscode will only be included if at least one of the residues has a
    non-blank inscode.

    :param residues: Residue objects to create ASL
    :type residues: collections.abc.Iterable(structure._Residue)

    :rtype: str
    """
    # Note:
    # ASLs generated by this function are faster to evaluate than
    # `" OR ".join(res.getAsl() for res in residues)`
    # by ~30% on average and an order of magnitude with no inscodes
    rescodes_by_chain = collections.defaultdict(set)
    found_inscode = False
    for res in residues:
        inscode = res.inscode
        chain = res.chain
        if not chain.strip():
            chain = f'"{chain}"'
        if inscode.strip():
            found_inscode = True
        else:
            inscode = f'"{inscode}"'
        rescodes_by_chain[chain].add((res.resnum, inscode))

    if found_inscode:

        def create_asl_for_chain(rescodes):
            per_res_asls = (
                f'(res.num {num} AND res.i {ins})' for num, ins in rescodes)
            residues_str = " OR ".join(per_res_asls)
            return f'({residues_str})'
    else:

        def create_asl_for_chain(rescodes):
            number_str = ",".join(str(resnum) for resnum, _ in rescodes)
            return f'res.num {number_str}'

    chain_parts = {
        chain: create_asl_for_chain(sorted(rescodes))
        for chain, rescodes in rescodes_by_chain.items()
    }

    asl = " OR ".join(f'(chain. {chain} AND {residues})'
                      for chain, residues in sorted(chain_parts.items()))
    if not found_inscode:
        asl = f'({asl}) AND res.i " "'
    return asl


[docs]def validate_asl(asl):
    """
    Validate the given ASL expression.  This is useful for validating an ASL
    when a structure object is not available - for example when validating
    a command line option.
    NOTE: A warning is also printed to stdout if the ASL is not valid.

    :param asl: ASL expression.
    :type asl: str

    :return: True if ASL is valid, False otherwise.
    :rtype: bool
    """
    try:
        bs = Bitset(size=0)
        st = create_new_structure()
        mm.mmasl_parse_input(asl, bs, st, 0)
        return True
    except mm.MmException:
        return False


[docs]def evaluate_asl(st, asl_expr):
    """
    Search for substructures matching the ASL (Atom Specification Language)
    string `asl_expr` in `Structure` `st`.

    :type st: `Structure`
    :param st: Structure to search for matching substructures.

    :type asl_expr: str
    :param asl_expr: ASL search string.

    :rtype: list
    :return: List containing indices of matching atoms.

    :raise schrodinger.infra.mm.MmException:
            If the ASL expression is invalid.
    """
    # Get the number of atoms in this CT
    num_atoms = mm.mmct_ct_get_atom_total(st)
    # Create a bitset object of that size:
    bs = Bitset(size=num_atoms)
    # Parse the input string using mmasl:
    mm.mmasl_parse_input(asl_expr, bs, st, 1)
    return list(bs)


[docs]def get_atoms_from_asl(st, asl_expr):
    """
    Return atoms matching the ASL string `asl_expr` in `Structure` `st`.

    :type st: `Structure`
    :param st: Structure to search for matching substructures.

    :type asl_expr: str
    :param asl_expr: ASL search string.

    :rtype: generator
    :return: Generator of matching `StructureAtom` objects.

    :raise schrodinger.infra.mm.MmException:
            If the ASL expression is invalid.

    """
    return (st.atom[index] for index in evaluate_asl(st, asl_expr))


[docs]def hydrogens_present(st):
    """
    Return True if all hydrogens are present in `Structure` `st`, False
    otherwise.

    Since all modern force fields require hydrogens, this is a good check to
    make sure that a structure is ready for force field calculations. This
    function is implemented by checking to see if the structure can be used
    as-is in a calculation with OPLS2003.

    :warning: Requires atom types to be correct. Consider calling {Structure.retype} first.

    :type st: `Structure`
    :param st: Structure to be tested.

    :rtype: bool
    :return: Are all hydrogens are present?
    """

    # What the mmhtreat function does is check whether the structure will
    # run with the specified forcefield. Calling the function with OPLS2003
    # (which requires all atoms have hydrogens) is effectively a check that
    # all atoms have hydrogens. Since all modern force fields require
    # hydrogens, it is a reasonable check that a structure is ready for any
    # backend job.

    (ok, bs) = mm.mmhtreat_checkff("OPLS2003", st)
    if mm.mmbs_in_use(bs):
        mm.mmbs_delete(bs)

    return bool(ok)


[docs]def has_valid_lewis_structure(st: Structure) -> bool:
    """
    Check whether a valid Lewis structure for the structure is possible.
    Possible causes of an invalid Lewis structure may be invalid bond orders,
    charges, or missing hydrogens or other atoms.

    This check may be useful before attempting to run any backend calculations
    on the given structure.
    """
    mm.mmerr_initialize()
    mm.mmlewis_initialize(mm.MMERR_OFF)
    try:
        # with MMERR_OFF we may still get output to the terminal, so silence I/O
        with ioredirect.IOSilence():
            mm.mmlewis_apply(st)
        return True
    except mm.MmException:
        valid = False
    finally:
        mm.mmlewis_terminate()
        mm.mmerr_terminate()
    return valid


[docs]def generate_tautomer_code(st,
                           considerEZStereo=True,
                           considerRSStereo=True,
                           stereo=STEREO_FROM_ANNOTATION_AND_GEOM,
                           strip=False):

    lazy_canvas_import()

    mol = None
    if isinstance(st, _canvas.ChmMol):
        mol = st
    else:
        mol = create_chmmol_from_structure(st, stereo=stereo)

    if strip:
        mol = _canvas.ChmPathTools.strip(mol)

    tautomerCode = mol.getTautomerCode64(considerEZStereo, considerRSStereo)
    mol = None
    return str(tautomerCode)


[docs]def create_chmmol_from_structure(structure,
                                 stereo=STEREO_FROM_ANNOTATION_AND_GEOM):
    """
    Creates a ChmMol object for a given structure and returns the ChmMol.

    :param schrodinger.structure.Structure structure: The structure to create
        ChmMol from.
    :param enum stereo:
            Specify how to determine the stereochemistry of a ChmMol from a
            Structure. Can be STEREO_FROM_GEOMETRY, STEREO_FROM_ANNOTATION,
            STEREO_FROM_ANNOTATION_AND_GEOM, or NO_STEREO. See
            `schrodinger.structutils.smiles.SmilesGenerator.__init__` for
            descriptions of these options.
    :return schrodinger.application.canvas.base.ChmMol: The created ChmMol.
    """

    lazy_canvas_import()
    adaptor = _canvas.ChmMmctAdaptor()
    mol = adaptor.create(structure.handle,
                         smiles._translate_stereo_enum(stereo))
    return mol


[docs]def generate_smiles(st, unique=True, stereo=STEREO_FROM_ANNOTATION_AND_GEOM):
    """:deprecated: Use `schrodinger.adapter.to_smiles` instead."""

    global smiles
    if smiles is None:
        import schrodinger.structutils.smiles as smiles

    smiles_generator = smiles.SmilesGenerator(stereo, unique=unique)
    pattern = smiles_generator.getSmiles(st)

    return pattern


[docs]def generate_smarts(st, atom_subset=None, check_connectivity=True):
    """:deprecated: Use `schrodinger.adapter.to_smarts` instead."""

    msg = ("analyze.generate_smarts() is deprecated. Use "
           "schrodinger.adapter.to_smarts() instead.")
    warnings.warn(msg, DeprecationWarning, stacklevel=2)

    bs = Bitset(size=len(st.atom))

    if atom_subset:
        for at in atom_subset:
            bs.set(at)
    else:
        bs.fill()

    # Check for connectivity (EV:106255)
    if check_connectivity:
        if atom_subset:
            new_ct = mm.mmct_ct_extract_atoms(st.handle, bs)
            new_st = Structure(new_ct)
            fragments = new_st.mol_total
        else:
            fragments = st.mol_total

        if not fragments:
            raise ValueError('No atoms found for SMARTS generation.')
        elif fragments > 1:
            raise ValueError('SMARTS generation only works on connected sets '
                             'of atoms.')

    mmsmiles_handle = mm.mmsmiles_new()
    pattern = ''
    pattern = mm.mmsmiles_get_smarts(mmsmiles_handle, st.handle, bs)
    mm.mmsmiles_delete(mmsmiles_handle)

    return pattern


[docs]def generate_smarts_canvas(st,
                           atom_subset=None,
                           check_connectivity=True,
                           include_hydrogens=False,
                           honor_maestro_prefs=False,
                           include_stereo=False):
    """:deprecated: Use `schrodinger.adapter.to_smarts` instead."""

    lazy_canvas_import()

    maestro = get_maestro()

    # Check for connectivity (EV:106255)
    if check_connectivity:
        if atom_subset is not None:
            fragments = st.extract(atom_subset).mol_total
        else:
            fragments = st.mol_total
        if not fragments:
            raise ValueError('No atoms found for SMARTS generation.')
        elif fragments > 1:
            raise ValueError('SMARTS generation only works on connected sets '
                             'of atoms.')

    if include_hydrogens:
        wantH = _canvas.optionSMARTS.H_Always
    else:
        wantH = _canvas.optionSMARTS.H_Default

    # Prepare bitset of atoms to generate SMARTS pattern for:
    bs = Bitset(size=st.atom_total)
    if atom_subset is not None:
        for at in atom_subset:
            bs.set(at)
            if st.atom[at].atomic_number == 1:
                wantH = 1
    else:
        pos = 1
        for atom in st.atom:
            if include_hydrogens or atom.atomic_number != 1:
                bs.set(pos)
            pos = pos + 1

    # Generate smarts pattern.
    smart_generator = _canvas.ChmMmctSmilesGenerator()
    markRings = False

    wantMapping = False
    useAtomicNumberOnAcyclicAtoms = False

    if honor_maestro_prefs:
        if not maestro:
            raise ValueError("honor_maestro_prefs option can be used only when "
                             "running inside Maestro")

        def _get_bool_pref(option):
            return (maestro.get_command_option("prefer", option) == "True")

        wantCharge = _get_bool_pref("smartscharge")
        wantSubstituents = _get_bool_pref("smartssubstituents")
        include_stereo = _get_bool_pref("smartsstereochemistry")
    else:
        wantCharge = True
        wantSubstituents = False

    if wantSubstituents:
        wantX = _canvas.optionSMARTS.X_Always
    else:
        wantX = _canvas.optionSMARTS.X_No

    return smart_generator.getSubsetSMARTS(st.handle, bs, markRings, wantH,
                                           wantX, include_stereo, wantMapping,
                                           None, useAtomicNumberOnAcyclicAtoms,
                                           wantCharge)


[docs]def can_atom_hydrogen_bond(atom):
    """
    Returns True if the given atom can be involved in a hydrogen bond.

    :param atom: Atom in question
    :type atom: `structure._StructureAtom`

    :return: Whether atom can H-bond
    :rtype: bool
    """
    if atom.element == 'H':
        if len(atom.bond) != 1:
            return False
        neighbor = next(atom.bonded_atoms)
        return bool(mm.mmct_hbond_is_donor(atom.structure, neighbor))
    else:
        return bool(mm.mmct_hbond_is_acceptor(atom.structure, atom))


[docs]def generate_crystal_mates(st,
                           radius=10.0,
                           space_group=None,
                           a=None,
                           b=None,
                           c=None,
                           alpha=None,
                           beta=None,
                           gamma=None,
                           group_radius=14.0):
    """
    Generate crystal mates for the input `Structure` `st`.

    Return a list of structures that represent the crystal mates. (Note that
    the first item in the list represents the identity transformation and as
    such will be identical to the input structure.)

    All crystal mates within `radius` of the input structure are generated.

    The crystal parameters can be specified as parameters to this function
    or can be standard PDB properties of the input structure. If the
    structure was read from a PDB file then these crystal properties will
    usually be present.

    The group_radius is used in the crystal mates calculation to determine
    whether a symmetric element is in contact with the ASU. There should be
    little reason to change the default value of 14.0.

    :type st: `Structure`
    :param st: Structure for which crystal mates will be generated.

    :type radius: float
    :param radius: Distance within which to generate crystal mates.

    :type space_group: str
    :param space_group: Space group of the crystal.  If `None`, uses
            `st`'s s_pdb_PDB_CRYST1_Space_Group.
    :type a: float
    :param a: Crystal 'a' length.  If `None`, uses `st`'s s_pdb_PDB_CRYST1_a.
    :type b: float
    :param b: Crystal 'b' length.  If `None`, uses `st`'s s_pdb_PDB_CRYST1_b.
    :type c: float
    :param c: Crystal 'c' length.  If `None`, uses `st`'s s_pdb_PDB_CRYST1_c.
    :type alpha: float
    :param alpha: Crystal 'alpha' angle.  If `None`, uses `st`'s
            s_pdb_PDB_CRYST1_alpha.
    :type beta: float
    :param beta: Crystal 'beta' angle.  If `None`, uses `st`'s
            s_pdb_PDB_CRYST1_beta.
    :type gamma: float
    :param gamma: Crystal 'gamma' angle.  If `None`, uses `st`'s
            s_pdb_PDB_CRYST1_gamma.
    :type group_radius: float
    :param group_radius: Used to determine whether a symmetric element is in
            contact with the ASU. There should be little reason to change the
            default value of 14.0.

    :rtype: list
    :return: list of `Structure objects<Structure>` that represent the
            crystal mates. (Note that the first item in the list represents the
            identity transformation and as such will be identical to the input
            structure.)
    """

    handle = -1

    absent = ""
    try:
        if not space_group:
            absent = "Space Group"
            space_group = st.property['s_pdb_PDB_CRYST1_Space_Group']
        if not a:
            absent = "a"
            a = st.property['r_pdb_PDB_CRYST1_a']
        if not b:
            absent = "b"
            b = st.property['r_pdb_PDB_CRYST1_b']
        if not c:
            absent = "c"
            c = st.property['r_pdb_PDB_CRYST1_c']
        if not alpha:
            absent = "alpha"
            alpha = st.property['r_pdb_PDB_CRYST1_alpha']
        if not beta:
            absent = "beta"
            beta = st.property['r_pdb_PDB_CRYST1_beta']
        if not gamma:
            absent = "gamma"
            gamma = st.property['r_pdb_PDB_CRYST1_gamma']
    except:
        raise Exception("Some crystal properties are not specified: %s" %
                        absent)

    handle = mm.mmcrystal_new()

    mm.mmcrystal_set_symmetry(handle, space_group, a, b, c, alpha, beta, gamma)

    mm.mmcrystal_set_group_radius(handle, group_radius)

    mates = mm.mmcrystal_generate_mates(handle, st, radius)

    ret_list = []
    for m in mates:
        ret_list.append(Structure(m))

    mm.mmcrystal_delete(handle)

    return ret_list


[docs]def find_overlapping_atoms(st,
                           ignore_hydrogens=False,
                           ignore_waters=False,
                           dist_threshold=0.8):
    """
    Search the specified structure for overlapping atoms.  Returns a list of
    (atom1index, atom2index) tuples.

    :type st: `Structure`
    :param st: Structure to search for overlapping atoms
    :param ignore_hydrogens:
            Whether to ignore hydrogens.
    :param ignore_waters:
            Whether to ignore waters.
    :param dist_threshold:
            Atoms are considered overlapping if their centers are within this
            distance of each other.

    :rtype: list
    :return: Each value is a tuple containing the indices of overlapping atoms.
    """

    asl = "all"
    if ignore_waters:
        asl += " AND (NOT water)"
    if ignore_hydrogens:
        asl += " AND (NOT atom.ele H)"

    measure = schrodinger.structutils.measure
    if asl == "all":
        close_atoms = measure.get_close_atoms(st, dist_threshold)
    else:
        matched_atoms = evaluate_asl(st, asl)
        close_atoms = measure.get_close_atoms(st, dist_threshold, matched_atoms)

    return close_atoms


[docs]def generate_molecular_formula(st):
    """
    Return a string for the molecular formula in Hill notation for the `st`.
    The structure must contain only one molecule.

    :type st: `Structure`
    :param st: Find the molecular formula for this structure.  Must contain only
            one molecule.

    :rtype: str
    :return: The molecular formula for `st`.
    """

    def get_formula_substr(element_str, element_count):
        """
        :type element_str: str
        :param element_str: The element name
        :type element_count: int
        :param element_count: Atom count for the element.
        :rtype: str
        :return: String for a single element with name and number.
        """

        formula_substring = ""
        if element_count == 0:
            formula_substring = ""
        elif element_count == 1:
            formula_substring = element_str
        else:
            formula_substring = "".join([element_str, repr(element_count)])

        return formula_substring

    # add atomic symbol counts to dict
    formula_dict = {}  # key: element, value: number of atoms
    for atom in st.atom:
        element = atom.element
        try:
            formula_dict[element] += 1
        except KeyError:
            formula_dict[element] = 1

    formula_list = []

    # Parse the dict to get formula, C and H are special
    if 'C' in formula_dict:
        formula_list.append(get_formula_substr('C', formula_dict['C']))
    if 'H' in formula_dict:
        formula_list.append(get_formula_substr('H', formula_dict['H']))

    # parse the rest of the dict keys to build the rest of the formula
    for element in sorted(list(formula_dict)):
        if element not in ['C', 'H']:
            formula_list.append(
                get_formula_substr(element, formula_dict[element]))

    return "".join(formula_list)


def _may_atom_be_part_of_rotatable_bond(atom, allow_methyl=False):
    """
    Determine whether an atom might be in a rotatable bond.

    :type atom: `_StructureAtom`
    :param atom: Atom to test for presence of rotatable bond.

    :type allow_methyl: bool
    :param allow_methyl: allow -CH3 and -NH3 as rotatable
        bonds, if True

    :rtype: bool
    :return: Might the atom be in a rotatable bond?
    """

    # Internal function

    # if atom is hydrogen, bond will not be rotatable
    if atom.atomic_number == 1:
        return False

    # If has one connection it is a terminal atom and non-rotatable
    if atom.bond_total <= 1:
        return False

    # Check if all attachments but one to atom are H
    atom_numH = 0
    triple_present = False
    for bond in atom.bond:
        if bond.atom2.atomic_number == 1:
            atom_numH += 1
        if bond.order == 3:
            triple_present = True
            break

    # Bond adjacent to triple bonds are not counted as rotatable
    if triple_present:
        return False

    # if have three H and atoma is a carbon or nitrogen, not rotatable
    if not allow_methyl and atom_numH == 3 and atom.atomic_number in [6, 7]:
        return False

    return True


def _is_bond_ring_edge(bond, rings):
    """
    Determine whether a bond is a ring edge.
    :type bond: `_StructureBond`
    :param bond: Bond to test for presence at ring edges.
    :type rings: list
    :param rings: list of ring atom index lists.
    :rtype: bool
    :return: Is the bond a ring edge?
    """
    # Internal function. Returns True if specified bond is a ring edge.

    atoma = bond.atom1
    atomb = bond.atom2

    # Get rid of all ring bonds except those between atoms on different rings, i.e. C6H11-C6H11
    # Figure out if atoma-atomb bond is a ring edge:
    is_ring_edge = False
    for ringatoms in rings:
        ring = _Ring(bond._ct, 0, ringatoms, None)
        for edge in ring.edge:
            if (edge.atom1 == atoma) and (edge.atom2 == atomb):
                is_ring_edge = True
                break
            elif (edge.atom1 == atomb) and (edge.atom2 == atoma):
                is_ring_edge = True
                break
        if is_ring_edge:
            break
    if is_ring_edge:
        return True

    return False


[docs]def is_bond_rotatable(bond, rings=None, allow_methyl=False):
    """
    Return True if specified bond is rotatable, False otherwise.

    A bond is considered rotatable if all of the following are true...

      1. It is a single bond.
      2. It is not adjacent to a triple bond.
      3. It is not in a ring.
      4. Neither atom is a hydrogen or other terminal atom.
      5. Neither atom is a carbon or nitrogen with three hydrogens attached.

    :type bond: `structure.StructureBond`
    :param bond: bond to test for rotatability.

    :type rings: list
    :param rings:
            List of ring atom index lists.  As an optimization, provide the (sorted)
            rings list from the find_rings function if you already have it.
            Otherwise, an SSSR calculation will be done.

    :type allow_methyl: bool
    :param allow_methyl: allow -CH3 and -NH3 as rotatable
        bonds, if True

    :rtype: bool
    :return: Is the bond rotatable?
    """

    # This function uses the logic derived from Impact's rotbond routine

    # Only 1-order bonds are rotatable:
    if bond.order != 1:
        return False

    if not _may_atom_be_part_of_rotatable_bond(bond.atom1,
                                               allow_methyl=allow_methyl):
        return False

    if not _may_atom_be_part_of_rotatable_bond(bond.atom2,
                                               allow_methyl=allow_methyl):
        return False

    # If got this far, then bond may be a rotatable bond (if not in ring)

    # If rings are not specified, determine the rings and sort by connection:
    if rings is None:
        rings = bond._ct.find_rings(sort=True)

    if _is_bond_ring_edge(bond, rings):
        return False

    return True


[docs]def rotatable_bonds_iterator(st, rings=None, max_size=None):
    """
    Return an iterator for rotatable bonds (atomnum1, atomnum2) in the
    structure.

    See the `is_bond_rotatable` function description for which bonds are
    considered rotatable.

    :type st: `Structure`
    :param st: The structure to search for rotatable bonds.

    :type rings: list
    :param rings:
            List of ring atom index lists. As an optimization, provide the (sorted)
            rings list from the find_rings function if you already have it.
            Otherwise, an SSSR calculation will be done.

    :type max_size: int
    :param max_size:
            If specified, yield only rings that have up to this number of bonds.
            Use this option to exclude large rings; e.g. those in macrocycle-like
            molecules.

    :rtype: iterator of tuples
    :return: yields tuples of atom index pairs describing a rotatable bond in `st`.
    """

    # This function uses logic derived from Impact's rotbond routine.

    # This is Python version of get_num_rotors from ligparse_utils.c
    # > must explicitly exit to not count rotors, each bond is
    # > considered rotatable as the default

    # If rings are not specified, determine the rings and sort by connection:
    if rings is None:
        rings = st.find_rings(sort=True)
    if max_size is not None:
        rings = [r for r in rings if len(r) <= max_size]

    # For each atom in the structure:
    for atoma in st.atom:
        if not _may_atom_be_part_of_rotatable_bond(atoma):
            continue

        # Now loop over all attachments to atoma:
        for bond in atoma.bond:
            atomb = bond.atom2

            # only want to check 1/2 the bond matrix
            if int(atomb) < int(atoma):
                continue

            # if bond between atomba and atomb is double or triple bond, not rotatable
            if bond.order != 1:
                continue

            if not _may_atom_be_part_of_rotatable_bond(atomb):
                continue

            # If got this far, then atoma-atomb bond may be a rotatable bond (if not in ring)
            if _is_bond_ring_edge(bond, rings):
                continue  # not rotatable

            # If control makes it this far, it's a rotatable bond:
            yield (atoma.index, atomb.index)

    # Will raise StopIteration when gets here.


[docs]def get_num_rotatable_bonds(st, rings=None, max_size=None):
    """
    Return the number of rotatable bonds in the Structure `st`.  The count
    does not include trivial rotors such as terminal methyls, or rotors
    within rings.

    :type st: `Structure`
    :param st: The structure to search for rotatable bonds.

    :type rings: list of lists of ints
    :param rings:
            List of ring atom index lists. As an optimization, provide the (sorted)
            rings list from the find_rings function if you already have it.
            Otherwise, an SSSR calculation will be done.

    :type max_size: int
    :param max_size:
            If specified, yield only rings that have up to this number of bonds.
            Use this option to exclude large rings; e.g. those in macrocycle-like
            molecules.

    :rtype: int
    :return: The number of rotatable bonds in `st`.
    """

    # Comments moved down from the docstring during the docstring review.

    # This function returns the number of rotatable bonds in the st
    # using logic derived from Impact's rotbond routine

    # This is Python version of get_num_rotors from ligparse_utils.c
    # > must explicitly exit to not count rotors, each bond is
    # > considered rotatable as the default

    return sum(1 for item in rotatable_bonds_iterator(
        st, rings=rings, max_size=max_size))


[docs]def hbond_iterator(st, atoms=None):
    """
    Iterate over hydrogen bond between the atoms specified by the `atom_set`
    and the other atoms in `st`. Each yielded item is a tuple of
    (atom-index-1, atom-index-2).

    NOTE: This function has been updated to simply act as a wrapper to
    `hbond.get_hydrogen_bonds` to ensure that hbonds are determined
    consistently.

    :type st: `Structure`
    :param st: The structure to search for H-bonds.

    :type atoms: list of int or None
    :param atoms:
            A list of atom indices (or _StructureAtom objects) to analyze.
            If not specified, then all H-bonds present in the structure are
            returned.

    :rtype: list of (`_StructureAtom`, `_StructureAtom`)
    :return: list of (donor atom object, acceptor atom object) for each
        hydrogen bond identified.
    """
    msg = ("analyze.hbond_iterator() is deprecated. Use "
           "schrodinger.structutils.interactions.hbond.get_"
           "hydrogen_bonds() instead.")
    warnings.warn(msg, DeprecationWarning, stacklevel=2)
    return hbond.get_hydrogen_bonds(st, atoms1=atoms)


[docs]def find_equivalent_atoms(st, span_molecules=True):
    """
    Find atoms in the structure that are equivalent. For example, all three
    hydrogens on a methyl group are equivalent.

    Returns a list, each value of which is a list of atoms that are
    equivalent.

    :type st: `Structure`
    :param st: The structure to search for equivalent atoms.

    :type span_molecules: bool
    :param span_molecules: If True, don't consider molecules to
        be separate entities. If False, constructs global equivalence
        classes for all atoms in a ct, but will never return an equivalence class
        across molecules.

    :rtype: list
    :return: Each value is a list of indices of equivalent atoms.
    """

    sites = mm.mmsym_find_equivalent_sites_mol(st, not span_molecules)

    atoms_by_site = {}
    for i, sitenum in enumerate(sites):
        try:
            atoms_by_site[sitenum].append(i + 1)
        except KeyError:
            atoms_by_site[sitenum] = [i + 1]

    # Remove atoms that have no equivalent to them:
    for key, value in list(atoms_by_site.items()):
        if len(value) == 1:
            del atoms_by_site[key]

    return list(atoms_by_site.values())


[docs]def get_approximate_sasa(st, atom_indexes=None, cutoff=7.0):
    """
    :deprecated: This function only returns a rough approximation to the solvent
            accessible surface area. Please use the `calculate_sasa` function
            instead.
    """

    msg = "analyze.get_approximate_sasa() is deprecated. Use calculate_sasa() instead."
    warnings.warn(msg, DeprecationWarning, stacklevel=2)

    if atom_indexes is None:
        atom_indexes = list(range(1, st.atom_total + 1))
    all_A_c = []
    for iat in atom_indexes:
        A_c = get_approximate_atomic_sasa(st, iat, cutoff)
        all_A_c.append(A_c)
    structure_sasa = sum(all_A_c)
    return structure_sasa


[docs]def get_approximate_atomic_sasa(st,
                                iat,
                                cutoff=7.0,
                                sasa_probe_radius=1.4,
                                hard_sphere_s=2.5,
                                scale_factor=2.32):
    """
    :deprecated: Deprecated in favor of `calculate_sasa`, which is more
            accurate.
    """

    msg = "analyze.get_approximate_atomic_sasa() is deprecated. Use calculate_sasa() instead."
    warnings.warn(msg, DeprecationWarning, stacklevel=2)

    # FIXME:  The scale_factor should not be required (e.g. = 1), and
    # represents a flaw in my implementation.  However, empirically I find
    # it gives OK results for both sucrose confs and lysozyme 1l47.pdb.
    # The scale_factor makes S larger when evaluating individual
    # probabilities.  Without the factor, the product many near unity
    # probabilties (e.g. 0.98), result in a A' surface area that is
    # too small.

    r1_rad = st.atom[iat].radius
    S = 4 * math.pi * math.pow((r1_rad + sasa_probe_radius), 2)  # atomic sasa
    all_prob = []
    all_b_prime = []

    # Loop over non-identity atoms, calculate b and b'.
    asl_expr = "not a.n %d and within %f a.n %d" % (iat, cutoff, iat)
    for jat in evaluate_asl(st, asl_expr):

        r2_rad = st.atom[jat].radius
        r1_r2_rad = r1_rad + r2_rad
        dist = st.measure(iat, jat)

        # b: maximal overlap Eq 3, the amount of iat's SA buried by jat.
        # if dist > (r1_r2_rad + 2*sasa_probe_radius)
        # then b should be zero.
        b = None
        if dist > (r1_r2_rad + 2 * sasa_probe_radius):
            b = 0
        else:
            b = (math.pi * (r1_rad + sasa_probe_radius) *
                 (r1_r2_rad + 2 * sasa_probe_radius - dist) *
                 (1 + ((r2_rad - r1_rad) / dist)))
        # A negative b is non-physical so convert to 0.0.
        if b < 0:
            b = 0.0

        # b': minimal overlap Eq 5, accounts for excluded volume counts.
        b_prime = (math.pi * (r1_rad + sasa_probe_radius) *
                   (r1_r2_rad + 2 * sasa_probe_radius - hard_sphere_s - dist) *
                   (1 + ((r2_rad - hard_sphere_s - r1_rad) / dist)))

        # A negative b_prime is non-physical so convert 0.0.
        if b_prime < 0:
            b_prime = 0.0

        all_b_prime.append(b_prime)

        prob_j = 1 - ((b - b_prime) / (S * scale_factor))
        all_prob.append(prob_j)

    # Loop over probabilities.
    product = 1
    for prob_j in all_prob:
        product *= prob_j

    B_prime = sum(all_b_prime)  # Eq 9.
    A_prime = S * product  # Eq 7.

    # Eq 6.
    if A_prime < B_prime:
        A_c = 0
    else:
        A_c = A_prime - B_prime

    logger.debug("%d(%s) %.2f, S %.2f, A' %.2f, B' %.2f, p(%d) %.5f" %
                 (iat, st.atom[iat].element, A_c, S, A_prime, B_prime,
                  len(all_prob), product))
    return A_c


[docs]def calculate_sasa_by_atom(st,
                           atoms=None,
                           cutoff=8.0,
                           probe_radius=1.4,
                           resolution=0.2,
                           exclude_water=False):
    """
    Calculate the solvent-accessible surface area (SASA) for the whole
    structure, or an atom subset, and returns a list of floats.

    :type st: `Structure`
    :param st: Structure for which SASA is desired.

    :type atoms: list
    :param atoms: List of atom indices or of
            `_StructureAtom objects<_StructureAtom>` for the atoms to count.  If
            None, calculates SASA for all atoms. (default: None)

    :type cutoff: float
    :param cutoff: Atoms within this distance of `atoms` will be considered for
            occlusion. Requires `atoms` to be specified. (default: 8.0A)

    :type probe_radius: float
    :param probe_radius: Probe radius, in units of angstroms, of the solvent
            molecule. (default: 1.4A)

    :type resolution: float
    :param resolution: Resolution to use. Decreasing this number will yield
            better results, increasing it will speed up the calculation. NOTE: This
            is NOT the same option as Maestro's surface resolution, which uses a
            different algorithm to calculate the surface area. (default: 0.2)

    :type exclude_water: bool
    :param exclude_water: If set to True then explicitly exclude waters in the
            method. This option is only works when 'atoms' argument is passed.
            (default: False)

    :rtype: list
    :return: A list of solvent accessible surface area of selected atoms in `st`.
    """

    # NOTE: the calculateSASA() function is defined here:
    # mmshare/canvaslibs_ext/mm/ChmMmctAdaptor.cpp
    #
    # The Phase library that is used by the above function is defined here:
    # mmshare/phaselibs/feature/PhpSASArea.cpp
    #

    lazy_canvas_import()

    if atoms is None and not exclude_water:
        # Calculate SASA for the whole structure

        sasa_list = _canvas.ChmMmctAdaptor.calculateSASAByAtom(
            st.handle, probe_radius, resolution, False)
    else:
        # Calculate SASA for a subset of structure or exclude waters
        if atoms is None:
            atoms = st.getAtomIndices()
            atoms_asl = "all"
        else:
            # In case the user passed an atom iterator:
            atoms = list(map(int, atoms))
            # If there are no atoms, return an empty list
            if not atoms:
                return []
            atoms_asl = generate_asl(st, atoms)

        # Calculate SASA for a subset

        # Create a bitset of the atom subset:
        atoms_bitset = Bitset.from_list(st.atom_total, atoms)

        # Create a bitset of the occlusion set:
        close_asl = "(within %s %s)" % (cutoff, atoms_asl)
        if exclude_water:
            close_asl += " and not water"
        close_atoms = evaluate_asl(st, close_asl)

        consider_atoms_bitset = Bitset.from_list(st.atom_total, close_atoms)

        sasa_list = _canvas.ChmMmctAdaptor.calculateSASAByAtom(
            st.handle, atoms_bitset.handle, consider_atoms_bitset.handle,
            probe_radius, resolution, False)

    return sasa_list


[docs]def calculate_sasa_by_residue(st,
                              atoms=None,
                              cutoff=8.0,
                              probe_radius=1.4,
                              resolution=0.2,
                              exclude_water=False):
    """
    Calculate the solvent-accessible surface area (SASA) for the whole
    structure, or an atom subset, and then group them by residue.

    :type st: `Structure`
    :param st: Structure for which SASA is desired.

    :type atoms: list
    :param atoms: List of atom indices or of
            `_StructureAtom objects<_StructureAtom>` for the atoms to count.  If
            None, calculates SASA for all atoms. (default: None)

    :type cutoff: float
    :param cutoff: Atoms within this distance of `atoms` will be considered for
            occlusion. Requires `atoms` to be specified. (default: 8.0A)

    :type probe_radius: float
    :param probe_radius: Probe radius, in units of angstroms, of the solvent
            molecule. (default: 1.4A)

    :type resolution: float
    :param resolution: Resolution to use. Decreasing this number will yield
            better results, increasing it will speed up the calculation. NOTE: This
            is NOT the same option as Maestro's surface resolution, which uses a
            different algorithm to calculate the surface area. (default: 0.2)

    :type exclude_water: bool
    :param exclude_water: If set to True then explicitly exclude waters in the
            method. This option is only works when 'atoms' argument is passed.
            (default: False)

    :rtype: list
    :return: a list of solvent accessible surface area of residues (ordered by
            connectivity) within `st`.
    """

    sasa_list = calculate_sasa_by_atom(st,
                                       atoms=atoms,
                                       cutoff=cutoff,
                                       probe_radius=probe_radius,
                                       resolution=resolution,
                                       exclude_water=exclude_water)
    if atoms is None:
        atoms = st.getAtomIndices()
    else:
        atoms = list(map(int, atoms))

    # now do all the accounting
    sasa_by_residue = []
    for res in get_residues_by_connectivity(st):
        sasa = 0.0
        res_matched = False
        for atom in res.atom:
            if atom.index in atoms:
                sasa += sasa_list[atom.index - 1]
                res_matched = True
        if res_matched:
            sasa_by_residue.append(sasa)
    return sasa_by_residue


[docs]def calculate_sasa(st,
                   atoms=None,
                   cutoff=8.0,
                   probe_radius=1.4,
                   resolution=0.2,
                   exclude_water=False,
                   exclude_atoms=None):
    """
    Calculate the solvent-accessible surface area (SASA) for the whole
    structure, or an atom subset.

    :type st: `Structure`
    :param st: Structure for which SASA is desired.

    :type atoms: list
    :param atoms: List of atom indices or of
            `_StructureAtom objects<_StructureAtom>` for the atoms to count.  If
            None, calculates SASA for all atoms. (default: None)

    :type cutoff: float
    :param cutoff: Atoms within this distance of `atoms` will be considered for
            occlusion. Requires `atoms` to be specified. (default: 8.0A)

    :type probe_radius: float
    :param probe_radius: Probe radius, in units of angstroms, of the solvent
            molecule. (default: 1.4A)

    :type resolution: float
    :param resolution: Resolution to use. Decreasing this number will yield
            better results, increasing it will speed up the calculation. NOTE: This
            is NOT the same option as Maestro's surface resolution, which uses a
            different algorithm to calculate the surface area. (default: 0.2)

    :type exclude_water: bool
    :param exclude_water: If set to True then explicitly exclude waters in the
            method. This option is only works when 'atoms' argument is passed.
            (default: False)

    :type exclude_atoms: list
    :param exclude_atoms: aid of atoms that you don't want in SASA caluclation
            useful for FEP-type systems where a second 'image' of a molecules is
            present

    :rtype: float
    :return: The solvent accessible surface area of the selected `atoms` within
            `st`.

    """

    # NOTE: the calculateSASA() function is defined here:
    # mmshare/canvaslibs_ext/mm/ChmMmctAdaptor.cpp
    #
    # The Phase library that is used by the above function is defined here:
    # mmshare/phaselibs/feature/PhpSASArea.cpp
    #

    lazy_canvas_import()

    if atoms is None:
        # Calculate SASA for the whole structure

        sasa = _canvas.ChmMmctAdaptor.calculateSASA(
            st.handle,
            probe_radius,
            resolution,
            False,  # excludeH=False - For consistency with Maestro
        )

    else:
        # In case the user passed an atom iterator:
        atoms = list(map(int, atoms))

        # Calculate SASA for a subset

        # Create a bitset of the atom subset:
        atoms_bitset = Bitset.from_list(st.atom_total, atoms)

        #TODO: need to explicitly exclude the other molecule if FEP system
        # Create a bitset of the occlusion set:
        atoms_asl = "(atom.num %s)" % ', '.join(map(str, atoms))
        close_asl = "(within %s %s)" % (str(cutoff), atoms_asl)
        if exclude_water:
            close_asl += " and not water"

        if exclude_atoms:
            close_asl += ' and not (atom.num %s)' % \
                    ', '.join(map(str, exclude_atoms))
        close_atoms = evaluate_asl(st, close_asl)

        if len(close_atoms) == len(atoms):
            newst = st.extract(atoms)
            sasa = _canvas.ChmMmctAdaptor.calculateSASA(newst.handle,
                                                        probe_radius,
                                                        resolution, False)
        else:
            consider_atoms_bitset = Bitset.from_list(st.atom_total, close_atoms)

            sasa = _canvas.ChmMmctAdaptor.calculateSASA(
                st.handle,
                atoms_bitset.
                handle,  # const int& mmbsSubsetAtomsCount,    //these are considered in sum
                consider_atoms_bitset.
                handle,  # const int& mmbsSubsetAtomsConsider, //these are considered for occlusion
                probe_radius,
                resolution,
                False)
    if math.isnan(sasa):
        sasa = 0.0
    return sasa


[docs]def calc_buried_sasa_by_residue(st, group1_atoms, group2_atoms, resolution=0.5):
    """
    Calculate the buried SASA ratio (delta SASA upon binding) for each
    residue, which measures how much of the residue's surface is interacting
    with the other binding partner.
    Value of 1.0 means that all of residue's area, as calculated in a subunit,
    is no longer accessible to solvent when in complex with the other group.
    Value of 0.0 means that all of that residue is accessible as a complex
    as well (residue is not on the interaction surface). Value of 0.0 is also
    used for residues that are fully buried in their subunits.

    :param st: Structure object
    :type st: `structure.Structure`

    :param group1_atoms: Atoms of the first binding partners group.
    :type group1_atoms: list of ints

    :param group2_atoms: Atoms of the second binding partners group.
    :type group2_atoms: list of ints

    :param resolution: Resolution to use. See calculate_sasa_by_atom().
    :type resolution: float

    :return: Dictionary where keys are residue strings (e.g. "A:123"), and
            values are the buried SASA ratio for that residue (0.0-1.0).
    :rtype: dict
    """

    # Initialize the output dict:
    buried_sasa_by_res = {str(res): 0.0 for res in st.residue}

    def calc_unbound_sasa(st, group_atoms):
        del_atoms = [
            ai for ai in range(1, st.atom_total) if ai not in group_atoms
        ]
        group_st = st.copy()
        renumber_map = group_st.deleteAtoms(del_atoms, renumber_map=True)
        inv_renumber_map = {new: orig for orig, new in renumber_map.items()}
        sasa_by_atom = calculate_sasa_by_atom(group_st, resolution=resolution)
        sasa_by_orig_atom = {}
        for new_anum, sasa in enumerate(sasa_by_atom, start=1):
            orig_anum = inv_renumber_map[new_anum]
            sasa_by_orig_atom[orig_anum] = sasa
        return sasa_by_orig_atom

    # Calculate the SASA in context of the whole structure/complex:
    all_sasa_by_atom = calculate_sasa_by_atom(st, resolution=resolution)

    # Calculate SASA for individual groups:
    group1_sasas = calc_unbound_sasa(st, group1_atoms)
    group2_sasas = calc_unbound_sasa(st, group2_atoms)

    unbound_sasa_by_atom = {
        anum: sasa for anum, sasa in list(group1_sasas.items()) +
        list(group2_sasas.items())
    }

    for res in st.residue:
        bound_sasa = 0.0
        unbound_sasa = 0.0
        for atom in res.atom:
            if atom.index in group1_atoms or atom.index in group2_atoms:
                bound_sasa += all_sasa_by_atom[atom.index - 1]  # 0-indexed
                unbound_sasa += unbound_sasa_by_atom[atom.index]  # 1-indexed

        if unbound_sasa == 0.0:
            # completely buried, can't interact with the other group
            ratio = 0.0
        else:
            ratio = 1.0 - (bound_sasa / unbound_sasa)
        buried_sasa_by_res[str(res)] = ratio
    return buried_sasa_by_res


[docs]def find_ligands(st, **kwargs):
    """
    Simple function interface for `AslLigandSearcher` class.

    :type st: `Structure`
    :param st: Structure to search.

    :rtype: list
    :return: a list of `Ligand` instances for putative ligands within st.
    """

    asl_searcher = AslLigandSearcher(**kwargs)
    return asl_searcher.search(st)


[docs]def center_of_mass(st, atom_indices: list = None):
    """
    Gets the structure's center of mass. If specified, this can be limited
    to a subset of atoms.

    NOTE: Periodic boundary conditions (PBC) are NOT honored.

    :param st: structure
    :type st: structure.Structure

    :param atom_list: 1-based atom indices
    :type atom_list: list(int)

    :return: centroid given as 3-element array [x, y, z]
    :rtype: numpy.array(float)

    See schrodinger/geometry/centroid.h
    """
    if atom_indices:
        bs = mmbitset.Bitset.from_list(st.atom_total, atom_indices)
        xyz = geometry.center_of_mass(st, bs)
    else:
        xyz = geometry.center_of_mass(st)
    # hack to fix wrapping and return [x, y, z]
    return xyz[:, 0]


[docs]def radius_of_gyration(
        st: "structure.Structure",  # noqa: F821
        atom_indices: Optional[List[int]] = None,
        mass_weighted: bool = False) -> float:
    """
    Calculate radius of gyration (R_gyr or Rg) is a measure of the size of an
    object of arbitrary shape.

    NOTE: Periodic boundary conditions (PBC) are NOT honored.
    :return: float value in Angstrom units
    """
    atom_indices = atom_indices or list(range(1, st.atom_total + 1))

    # Calculate the centroid
    com = numpy.array(center_of_mass(st, atom_indices=atom_indices))

    # Calculate the radius of gyration. R^2 = (1/N)*sum((r_i - r_mean)^2)
    xyz = numpy.array([st.atom[i].xyz for i in atom_indices])
    r2 = numpy.square(xyz - com).sum(axis=1)
    weights = [st.atom[i].atomic_weight for i in atom_indices] \
            if mass_weighted else [1] * len(atom_indices)
    # Calculate the sq distance between the atom and the centroid.
    r_gyr = numpy.sqrt(numpy.average(r2, weights=weights))
    return r_gyr


[docs]def calculate_principal_moments(struct=None, atoms=None, massless=False):
    """
    Calculate the principal moments of inertia for a list of atoms.
    This is calculated with respect to the x, y, and z
    coordinates of the atom's center of mass.

    :type struct: `Structure`
    :param struct: If given the moments will be calculated for the entire
        structure.  This overrides any atoms given with the atoms keyword.  Either
        atoms or structure must be given.

    :type atoms: list
    :param atoms: list of `schrodinger.structure._StructureAtom` objects.  Atom
            objects to compute the tensor for.  Either atoms or structure must be
            given.

    :type massless: bool
    :param massless: True if the calculations should be independent of the
        atomic masses (all mass=1), False (default) if atomic mass should be used.

    :rtype: tuple
    :return: A tuple of (eigenvalues, eigenvectors) of the inertial tensor.  The
        eigenvalues are the principle moments of inertia and are a list of length 3
        floats.  The eigenvectors are a list of lists, each inner list is a list of
        length 3 floats.
    """

    def get_distance(pt1, pt2):
        """
        Get the distance between two "points". The points need to
        be numpy arrays with 3 floats in each.

        :type pt1: 3 element `numpy array<numpy.ndarray>`
        :param pt1: first point
        :type pt2: 3 element `numpy array<numpy.ndarray>`
        :param pt2: second point
        :rtype: float
        :return: The distance between `pt1` and `pt2`
        """

        c = pt2 - pt1
        return numpy.sqrt(numpy.sum(c * c))

    if struct:
        atoms = [x for x in struct.atom]

    total_atoms = len(atoms)

    # First we calculate the center of mass. We do not use
    # a separate function for this since we do not want to
    # grab the atom properties more than once since this
    # significantly slows the process down
    atom_mass = numpy.zeros(total_atoms)
    atom_xyz = numpy.zeros((total_atoms, 3))

    for i in range(0, total_atoms):
        if massless:
            atom_mass[i] = 1
        else:
            atom_mass[i] = atoms[i].atomic_weight
        atom_xyz[i] = numpy.array(atoms[i].xyz)

    # Multiply each [x,y,z] by the mass of that atom
    mass_weighted = atom_xyz * atom_mass[:, numpy.newaxis]
    com = numpy.sum(mass_weighted, axis=0) / numpy.sum(atom_mass)

    shape = (3, 3)
    indices = numpy.ndindex(*shape)
    tensor = numpy.zeros(shape)

    for i, j in indices:
        delta = int(i == j)
        elem = numpy.zeros(total_atoms)
        for idx in range(0, total_atoms):
            vec = atom_xyz[idx] - com
            r2d = (get_distance(atom_xyz[idx], com)**2) * delta

            elem[idx] = atom_mass[idx] * (r2d - vec[i] * vec[j])

        tensor[i][j] = numpy.sum(elem)

    moments, vectors = numpy.linalg.eigh(tensor)
    moments = moments.tolist()
    vectors = vectors.tolist()
    axes = []
    for axis in range(3):
        axes.append([x[axis] for x in vectors])
    return moments, axes


[docs]def get_largest_moment_normalized_vector(**kwargs):
    """
    Return the normalized eigenvector of the largest moment of inertia. This
    will be the vector normal to the plane of the largest moment.
    See calculate_principal_moments for parameters.

    :rtype: `numpy.array`
    :return: The normalized vector for the largest moment of inertia
    """

    vals, vecs = calculate_principal_moments(**kwargs)
    eigvalvecs = list(zip(vals, vecs))
    largest_moment, largest_vector = max(eigvalvecs)
    ivec = numpy.array(largest_vector)
    inertial_vec = transform.get_normalized_vector(ivec)
    return inertial_vec


[docs]def find_shortest_bond_path(struct, index1, index2, atom_ids=None):
    """
    Find the shortest path of bonded atoms that connects atom1 to atom2

    The conversion of this routine to use networkx rather than scipy resulted in
    a dramatic reduction in both time and memory usage.

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure containing the atoms index1 and index2

    :type index1: int
    :param index1: The index of the first atom in the path

    :type index2: int
    :param index2: The index of the second atom in the path

    :type atom_ids: the atom_ids to search path from
    :param atom_ids: list of int

    :rtype: list
    :return: A list of indexes of atoms that connect atom index1 to atom index2
        along the
        shortest bond path. Index1 will be the first item in the list and index2
        will be the last. The second item in the list will be bonded to index1, the
        third will be bonded to the second, etc. If index1 == index2, a single item
        list is returned: [index1]

    :raise ValueError: if index1 and index2 are not part of the same molecule

    :raise MemoryError: if the system is too large
    """

    if index2 == index1:
        return [index1]

    atom1 = struct.atom[index1]
    mol = atom1.getMolecule()
    if atom_ids is None:
        atom_ids = mol.getAtomIndices()

    if index2 not in atom_ids:
        # Manually raise a value error rather than go through the whole process
        # and have networkx raise a networkx.exception.NetworkXNoPath error
        raise ValueError('Both atoms must be part of the same molecule')

    nx_graph = networkx.Graph()
    for atom_id in atom_ids:
        atom = struct.atom[atom_id]
        for neighbor in atom.bonded_atoms:
            if neighbor.index > atom.index:
                nx_graph.add_edge(atom.index, neighbor.index)
    path = networkx.shortest_path(nx_graph, index1, index2)
    return path


[docs]def create_nx_graph(struct, atoms=None):
    """
    Generate a networkx undirected graph of the structure based on bonds

    :type struct: `schrodinger.structure.Structure`
    :param struct: the structure to create a graph of

    :type atoms: iterable
    :param atoms: Optionally, graph edges will be restricted to this group of
        atoms - items are `schrodinger.structure._StructureAtom` objects or atom
        indexes

    :rtype: `networkx.Graph`
    :return: An undirected graph of the structure with edges in place of each
        bond. Edges are identical regardless of the bond order of the bond.
    """

    if struct is None:
        raise RuntimeError('struct must be supplied to create a graph')

    nx_graph = networkx.Graph()
    if not atoms:
        for bond in struct.bond:
            nx_graph.add_edge(bond.atom1.index, bond.atom2.index)
    else:
        # Convert any indexes to atom objects and create a set
        atom_objs = set()
        for atom in atoms:
            atom_objs.add(struct.atom[int(atom)])

        for atom in atom_objs:
            aindex = atom.index
            for neighbor in atom.bonded_atoms:
                if neighbor in atom_objs and neighbor.index > aindex:
                    nx_graph.add_edge(aindex, neighbor.index)

    return nx_graph


[docs]def improper_dihedral_iterator(struct=None,
                               atoms=None,
                               nx_graph=None,
                               include_proper_improper=True,
                               include_proper=True):
    """
    An iterator over all the improper dihedral angles in a structure or
    group of atoms.

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure to find improper dihedrals in. Either
        struct or nx_graph must be given.

    :type atoms: iterable
    :param atoms: Optionally, improper dihedrals will be restricted to
        this group of atoms - items are `schrodinger.structure._StructureAtom`
        objects or atom indexes

    :type graph: `networkx.Graph`
    :param graph: A networkx graph of the structure with edges representing
        bonds. If not supplied one will be generated. If graph is supplied, struct
        and atoms are ignored.

    :type include_proper_improper: bool
    :param include_proper_improper: whether to include improper dihedrals that
        define the same degree-of-freedom defined by a proper dihedral obtained
        from torsion_iterator, for example in the digrams below the neighboring
        atoms are not bound and so the defined impropers offer new
        degrees-of-freedom but if they were bound (suppose (R'',12) was bound to
        (R',26) in the first diagram) the degree-of-freedom defined by the improper
        (7, 37, 12, 26) is redundant with that defined by the proper
        (7, 37, 12, 26) obtained from torsion_iterator, this boolean controls
        whether such impropers can be returned

    :type include_proper: bool
    :param include_proper: whether to include proper dihedrals obtained from
        torsion_iterator that define a new degree-of-freedom not defined by any
        improper dihedral, for example in the digram below suppose (R'',12) was
        bound to (R',26) and the bond between (X,37) and (R',26) did not exist,
        this boolean controls whether such propers can be returned as impropers

    :rtype: tuple
    :return: Each iteration yields a 4-integer tuple of atom indexes for an
        improper dihedral.  For each given quadruple all unique topologies are
        enumerated.  For example, for a standard quadruple ordering (i,j,k,l)
        all 6 of the following topologies are returned:
        (1) (i,j,k,l)
        (2) (j,i,k,l) # switch 1,2
        (3) (i,j,l,k) # switch 3,4
        (4) (j,i,l,k) # switch 1,2 and 3,4
        (5) (k,j,i,l) # switch 1,3
        (6) (i,l,k,j) # switch 2,4
        The standard quadruple ordering (i,j,k,l) considers the indices of the
        first three atoms as the central atom plus the two lowest index bonding
        atoms in bond-path ordering as lowest bonding atom, central atom, other
        bonding atom.  The last index in the quadruple is the remaining atom
        which is not bonded to the last atom in the previously mentioned triple
        but is bonded to the central atom.  If calling with include_proper then
        those quadruples do not have their topologies enumerated and the ordering
        will be such that the index of the first atom in the tuple will be
        smaller than the index of the last atom in the tuple.

    For example::

        (R,7)
          \
           (X,37)-(R',26)
          /
        (R'',12)

    (1) (7, 37, 12, 26) (standard)
    (2) (37, 7, 12, 26)
    (3) (7, 37, 26, 12)
    (4) (37, 7, 26, 12)
    (5) (12, 37, 7, 26)
    (6) (7, 26, 12, 37)

    or::

        (R,34)
          \
           (X,1)-(R',4)
          /
        (R'',78)

    (1) (4, 1, 34, 78) (standard)
    etc.
    """

    # for coarse-grain structures bonding patterns can be odd
    # relative to an atomistic viewpoint and so be general,
    # for N atoms bonded to a central atom the number of unique
    # improper dihedrals is the binomial coefficient N-choose-3
    # or (N 3) = N!/((N-3)!3!), if not include_proper_improper then
    # skip cases where neighbors bonded to the same central atom are
    # also bonded to each other as those degrees-of-freedom are
    # covered with normal proper torsions (see torsion_iterator),
    # with the function name improper_dihedral_iterator running with
    # include_proper is technically confusing as such proper dihedrals
    # are not at all related to improper dihedrals but offered here
    # anyway due to reasons discussed in MATSCI-4891

    if nx_graph is None:
        nx_graph = create_nx_graph(struct, atoms=atoms)

    get_topologies = lambda i, j, k, l: [(i, j, k, l), (j, i, k, l),
                                         (i, j, l, k), (j, i, l, k),
                                         (k, j, i, l), (i, l, k, j)]

    for ind2 in nx_graph.nodes():
        neighbors = list(nx_graph.neighbors(ind2))
        if len(neighbors) >= 3:
            for triple in itertools.combinations(neighbors, 3):
                redundant = False
                if not include_proper_improper:
                    for pair in itertools.combinations(triple, 2):
                        if nx_graph.has_edge(*pair):
                            redundant = True
                            break
                if not redundant:
                    ind1, ind3, ind4 = sorted(triple)
                    for quadruple in get_topologies(ind1, ind2, ind3, ind4):
                        yield quadruple

    # nothing is done here to ensure that the quadruples yielded here
    # have not already been yielded as cases where the degree-of-freedom
    # could be described as either improper or proper, so it is the
    # callers responsibility to dedup if necessary
    if include_proper:
        for quadruple in torsion_iterator(nx_graph=nx_graph):
            yield quadruple


[docs]def torsion_iterator(struct=None, atoms=None, nx_graph=None):
    """
    An iterator over all the bonded torsions in a structure or group of atoms.

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure to find torsions in. Either struct or nx_graph
        must be given.

    :type atoms: iterable
    :param atoms: Optionally, torsions will be restricted to this group of atoms
        - items are `schrodinger.structure._StructureAtom` objects or atom indexes

    :type graph: `networkx.Graph`
    :param graph: A networkx graph of the structure with edges representing
        bonds. If not supplied one will be generated. If graph is supplied, struct
        and atoms are ignored.

    :rtype: tuple
    :return: Each iteration yields a 4-integer tuple of atom indexes for a
        dihedral formed by bonded atoms. The index of the first atom in the tuple
        will be smaller than the index of the last atom in the tuple.
    """

    if nx_graph is None:
        nx_graph = create_nx_graph(struct, atoms=atoms)

    # In testing over molecules from 12 to 161,000 atoms, this networkx
    # implementation was consistently 3X faster than a similar ct-based
    # algorithm

    for ind2, ind3 in nx_graph.edges():
        # nx_graph[x] returns all the nodes attached to x
        for ind1 in nx_graph[ind2]:
            if ind1 != ind3:
                for ind4 in nx_graph[ind3]:
                    # 4 != 1 avoids three-membered rings
                    if ind4 != ind2 and ind4 != ind1:
                        if ind1 < ind4:
                            yield (ind1, ind2, ind3, ind4)
                        else:
                            yield (ind4, ind3, ind2, ind1)


[docs]def angle_iterator(struct=None, atoms=None, nx_graph=None):
    """
    An iterator over all the bonded angles in a structure or group of atoms.

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure to find angles in. Either struct or nx_graph
        must be given.

    :type atoms: iterable
    :param atoms: Optionally, angles will be restricted to this group of atoms -
        items are `schrodinger.structure._StructureAtom` objects or atom indexes

    :type graph: `networkx.Graph`
    :param graph: A networkx graph of the structure with edges representing
        bonds. If not supplied one will be generated. If graph is supplied, struct
        and atoms are ignored.

    :rtype: tuple
    :return: Each iteration yields a 3-integer tuple of atom indexes for an
        angle formed by bonded atoms. The index of the first atom in the tuple
        will be smaller than the index of the last atom in the tuple.
    """

    if nx_graph is None:
        nx_graph = create_nx_graph(struct, atoms=atoms)

    # See torsion_iterator for a comment on the speed of using networkx vs
    # ct-based algorithm

    for ind2 in nx_graph.nodes():
        neighbors = nx_graph.neighbors(ind2)
        for ind1, ind3 in itertools.combinations(neighbors, 2):
            if ind1 < ind3:
                yield (ind1, ind2, ind3)
            else:
                yield (ind3, ind2, ind1)


[docs]def bond_iterator(struct=None, atoms=None, nx_graph=None):
    """
    An iterator over all the bonds in a structure or group of atoms.

    Note: It may seem unnecessary to have this function as one must iterate over
    bonds to form the nx_graph that is then iterated over in this function.
    However, it may be the case that an nx_graph has already been created for
    other reasons such as when iterating over all bonds, angles and torsions.

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure to find bonds in. Either struct or nx_graph
        must be given.

    :type atoms: iterable
    :param atoms: Optionally, bonds will be restricted to this group of atoms -
        items are `schrodinger.structure._StructureAtom` objects or atom indexes

    :type graph: `networkx.Graph`
    :param graph: A networkx graph of the structure with edges representing
        bonds. If not supplied one will be generated. If graph is supplied, struct
        and atoms are ignored.

    :rtype: tuple
    :return: Each iteration yields a 2-integer tuple of atom indexes for a bond
        formed by bonded atoms. The index of the first atom in the tuple will be
        smaller than the index of the last atom in the tuple.
    """

    if nx_graph is None:
        nx_graph = create_nx_graph(struct, atoms=atoms)

    # See torsion_iterator for a comment on the speed of using networkx vs
    # ct-based algorithm

    for ind1, ind2 in nx_graph.edges():
        if ind1 < ind2:
            yield (ind1, ind2)
        else:
            yield (ind2, ind1)


[docs]def get_average_structure(sts):
    """
    Calculate the average structure between the given conformers.

    :type sts: Iterable of `structure.Structure` objects
    :param sts: Structures to average

    :rtype: `structure.Structure`
    :return: Average structure
    """

    coord_sums = None
    average_st = None

    for i, st in enumerate(sts, start=1):
        st_coords = st.getXYZ()
        if i == 1:
            coord_sums = st_coords
            average_st = st.copy()
        else:
            if st.atom_total != average_st.atom_total:
                raise ValueError("Input structures must be conformers")
            coord_sums = numpy.sum((coord_sums, st_coords), axis=0)

    if coord_sums is None:
        raise ValueError("Input sts can not be empty")

    average_coords = numpy.divide(coord_sums, i)
    average_st.setXYZ(average_coords)

    return average_st


[docs]def find_common_substructure(sts, atomTyping=11, allow_broken_rings=True):
    """
    Find the maximum substructure that is common between all specified CTs.
    If any of the structures matches the substructure SMARTS more than once,
    then all matches are reported - that is why output a "triple" list. Outer
    list represents input structure, next list represents matches, and inner
    list is list of atom indices for that match.
    It's up to the calling code to decide which of the multiple matches to
    use (one method is to use the one whose center-of-mass is closest to the
    COM of the whole ligand).
    NOTE: This function becomes exponentioally slow with larger number of
    structures. Recommened maximum around 30 structures.

    NOTE: This function checks CANVAS_SHARED exists and checks out CANVAS_FULL

    :type sts: Iterable of `structure.Structure` objects
    :param sts: Structures to average

    :type atomTyping: int
    :param atomTyping: Atom typing scheme to use. For list of available
            schemes, see $SCHRODINGER/utilities/canvasMCS -h

    :type allow_broken_rings: bool
    :param allow_broken_rings: Whether to allow partial mapping of rings

    :rtype: List of list of list of ints
    :return: Substructure atoms from each structure. Outer list represents
            input structures - in order of input; middle list represents matches,
            inner list represents atom indices for that match.
    """

    settings = canvas.MCSsettings()
    settings.atomTyping = atomTyping
    if not allow_broken_rings:
        settings.nobreakring = True
        settings.nobreakaring = True

    adaptor = canvas.ChmMmctAdaptor()

    mols = [adaptor.create(st.handle) for st in sts]

    out_dict = collections.OrderedDict()

    for item in canvas.runMCS(mols, settings):
        match_atoms = list(item.mapAtoms)
        try:
            out_dict[item.molID].append(match_atoms)
        except KeyError:
            out_dict[item.molID] = [match_atoms]

    assert list(out_dict) == list(range(len(mols)))

    return list(out_dict.values())


[docs]def group_by_connectivity(st, atoms):
    """
    Groups the atoms by molecule connectivity. Returns a list of atom groups.
    Each group is a list of atoms that are in the same "molecule" -
    that are bonded to each other, counting only atoms in specified list.
    If multiple atoms are in the same molecule, but are separated by atoms
    that are not in the list (e.g. 2 covalent ligands bound to same protein),
    they will be grouped separately.

    :param st: Structure that atoms are from.
    :type st: `structure.Structure`

    :param atoms: List of atom indices that are to be grouped.
    :type atoms: list of ints
    """
    unassigned_atoms = atoms[:]

    def walk_the_atom(atom, group_atoms, checked_atoms):
        ai = atom.index
        group_atoms.add(ai)
        for bond in atom.bond:
            if bond.order == 0:
                continue
            natom = bond.atom2
            nindex = natom.index
            if nindex in checked_atoms:
                continue
            checked_atoms.add(nindex)
            if nindex in unassigned_atoms:
                unassigned_atoms.remove(nindex)
                walk_the_atom(natom, group_atoms, checked_atoms)

    groups = []
    while unassigned_atoms:
        # Take the next unassigned atom, and find all atoms in the same
        # "molecule group" as that atom:
        aindex = unassigned_atoms[0]
        unassigned_atoms.remove(aindex)
        checked_atoms = {aindex}
        group_atoms = set()
        walk_the_atom(st.atom[aindex], group_atoms, checked_atoms)
        groups.append(sorted(group_atoms))

    return groups


[docs]def find_common_properties(sts: Iterable[Structure]) -> Set[str]:
    """
    Return a set of property names that are common to all selected structures.

    :type sts: Iterable of `structure.Structure` objects
    :param sts: Structures to analyze

    :return: set of property data names
    :rtype: set of str
    """
    common_props = None
    for st in sts:
        st_props = set(list(st.property))
        if common_props is None:
            common_props = st_props
        else:
            # Exclude properties that are not present in this row
            common_props.intersection_update(st_props)
        if not common_props:
            break  # No common properties - stop the search

    if not common_props:
        # No input structures
        return set()

    return common_props


[docs]def read_seqres_from_ct(in_ct):
    """                                                                                                                                                                           ct {schrodinger.Structure} Input ct to process
    Read the SEQRES data from a ct and return as a pair of lists
    with the same size.  The first has the chain names and the second the
    sequences
    ie ['A'] and ['ALA ALA ALA ']
    """
    # This is necessary as the actions here adjust the ct in some unknown way
    ct = in_ct.copy()
    # Get the PDB sequence strings for each chain:
    try:
        data_handle = mm.mmct_ct_m2io_get_unrequested_handle(ct)
    except mm.MmException:
        data_handle = mm.mmct_ct_get_or_open_additional_data(ct, True)
    num_seqres_blocks = mm.m2io_get_number_blocks(data_handle, "m_PDB_SEQRES")
    if num_seqres_blocks == 0:
        return [], []

    # get sequences from SEQRES chain:seqres
    out_chains = []
    out_seqres = []
    mm.m2io_goto_block(data_handle, "m_PDB_SEQRES", 1)
    num_rows = mm.m2io_get_index_dimension(data_handle)
    for i in range(1, num_rows + 1):
        chain_strings = mm.m2io_get_string_indexed(data_handle, i,
                                                   ["s_pdb_chain_id"])
        seqres_strings = mm.m2io_get_string_indexed(data_handle, i,
                                                    ["s_pdb_SEQRES"])
        out_chains.append(chain_strings[0])
        out_seqres.append(seqres_strings[0])
    return out_chains, out_seqres


[docs]def seq_align_match(fullseq,
                    fragseq,
                    pdbnum,
                    breaklist=None,
                    allow_frag_gaps=False):
    """
    restricted Needleman-Wunsch

    :param fullseq: Full sequence to work with.  Positions in the alignment
       that match this and not fragseq are given light penalities. Positions in
       the alignment that fragseq and not this are either not allowed
       (allow_frag_gaps=True) or have large penalties. This is intended to be
       the full protein sequence (from the seqres records) when alignining
       protein full protein sequences to those actually resolved in the
       experiment.
    :type fullseq: str

    :param fragseq: Fragment sequence to work with. This is intended to be the
        fragment of the protein sequence actually resolved in the experiment
        (ATOM records) when aligning full protein sequences to those actually
        resolved in the experiment.
    :type fragseq: str

    :param pdbnum: list of tuples with a integer and a character with the same
        length as fragseq ie [(1, ' ', (1, 'A'), (2, ' ')]} The residue numbers
        and insertion codes of the residues in fragseq. This allows for the gap
        penalties to be disregarded when the residue number suggests a gap.
    :type pdbnum: list(tuple)

    :param breaklist: { list of Boolean with the same length as fragseq } True
        values in this list mean that there is a known break after that residue
        in fragseq so gap penalties are disregarded.
    :type breaklist: list(bool)

    :param allow_frag_gaps: see fullseq for a description
    :type  allow_frag_gaps: bool

    Return value is a string with the same length as the alignment. A M will be
    at any position that matches the fullseq and fragseq. A U will be at any
    position that mathces fullseq, but not fragseq. A R will be at any position
    that mathces fragseq, but not fullseq.
    """
    lfull = len(fullseq)
    lfrag = len(fragseq)
    if lfrag == 0:
        raise RuntimeError('Zero-length sequence in alignment')
    if lfull - lfrag < 0 and not allow_frag_gaps:
        raise RuntimeError('Full sequence must be greater or equal to fragment')
    score = numpy.zeros((lfull, lfrag))
    trace = numpy.zeros((lfull, lfrag))
    # Fill up the matrix
    for ifull, cfull in enumerate(fullseq):
        for ifrag, cfrag in enumerate(fragseq):
            # If there is a match (no mismatch allowed?)
            is_match = False
            if (ifull == 0 or ifrag == 0):
                score_match = 0
            else:
                score_match = score[ifull - 1, ifrag - 1]
            if (cfull == cfrag and cfull != 'X'):
                score_match += 10
                is_match = True
            elif (cfull == 'X' and cfrag == 'X'):
                score_match += 0
            else:
                # The mismatch score should be HIGH so that we don't
                # Allow many prefer a pretty big gap over a mismatch
                score_match += -50
            # Gaps in frag are only allowed for 'X'
            if cfrag == 'X' and allow_frag_gaps:
                if ifrag == 0:
                    score_gap_frag = 0
                else:
                    score_gap_frag = score[ifull, ifrag - 1] - 5
            else:
                score_gap_frag = None
            # If there is a gap in the full (no gaps allowed in frag)
            if (ifull == 0):
                score_gap_full = 0
                gap_open_penalty_full = 0
            else:
                score_gap_full = score[ifull - 1, ifrag]
                # Deal with gap penalties
                if (trace[ifull - 1, ifrag] == 1):
                    if (ifrag >= len(fragseq) - 1):
                        # There' no penalty for opening a gap at the
                        # end of the fragment
                        gap_open_penalty_full = 0
                    elif (breaklist and not breaklist[ifrag + 1]):
                        # Skip the gap open penalty if we know there
                        # should be a gap here.
                        gap_open_penalty_full = 0
                    else:
                        # Default gap penalty is 40
                        gap_open_penalty_full = 40
                        # If the residue numbers on either side of the gap
                        # are contigious set up a gap-open penalty of 30
                        # otherwise set a gap open penalty of 40 to favor
                        # alignments that match the residue numbering scheme
                        try:
                            if (pdbnum[ifrag + 1][0] - pdbnum[ifrag][0] > 1):
                                gap_open_penalty_full = 30
                            # If the pdb numbers are not given in the input
                            # (TypeError on the minus) or we are at the end of
                            # the list (IndexError) then use the bigger penalty
                        except (TypeError, IndexError):
                            pass
                else:
                    # not the start of a gap
                    gap_open_penalty_full = 0
            score_gap_full = score_gap_full - gap_open_penalty_full
            if (score_match >= score_gap_full and
                (score_gap_frag is None or score_match >= score_gap_frag)):
                score[ifull, ifrag] = score_match
                trace[ifull, ifrag] = 1
            elif (score_gap_frag is None or score_gap_full >= score_gap_frag):
                score[ifull, ifrag] = score_gap_full
                trace[ifull, ifrag] = 2
            else:
                score[ifull, ifrag] = score_gap_frag
                trace[ifull, ifrag] = 3
    # Start from the bottom of the matrix and fill out the tracebacks
    map = []
    ifull = len(fullseq) - 1
    ifrag = len(fragseq) - 1
    while (ifull >= 0 and ifrag >= 0):
        if (trace[ifull, ifrag] == 1):
            map.append("M")
            ifull -= 1
            ifrag -= 1
        elif (trace[ifull, ifrag] == 2):
            map.append("U")
            ifull -= 1
        elif (trace[ifull, ifrag] == 3):
            map.append("R")
            ifrag -= 1
        else:
            raise RuntimeError("Malformed matrix")
    while (ifull >= 0):
        map.append("U")
        ifull -= 1
    while (ifrag >= 0):
        map.append("R")
        ifrag -= 1
    map.reverse()
    return map, numpy.max(score)


################################################################################
# Classes
################################################################################


[docs]class AslLigandSearcher(object):
    """
    Search a `Structure` instance for putative ligands
    with an Atom Selection Language expression.  Results are returned
    as a list of `Ligand` instances.

    API example::

        st = structure.Structure.read('file.mae')
        st_writer = structure.StructureWriter('out.mae')
        asl_searcher = AslLigandSearcher()
        ligands = asl_searcher.search(st)
        for lig in ligands:
            st_writer.append(lig.st)
        st_writer.close()

    ASL evaluates molecules in a strict sense.  Ligands with zero-order
    bonds to metal and covalently-attached ligands are difficult to find
    with this naive approach.  See __init__ for options that workaround
    these limitations.

    'sidechain', 'backbone', and 'ion' aliases are used by this module.
    They are taken from first mmasl.ini in the path, but are assumed to
    be defined as a list of PDB atom names that correspond to atoms of the
    protein side chains, protein backbone, and small ions respectively.

    Since the precise definition of a ligand is context specific and
    impossible to generally formulate, this class attempts to provide
    customizable tools for identifying ligands within a structure.
    It is the caller's responsibility to customize the search parameters
    and verify that the hits are appropriate.

    For all keyword args, the configured value from Maestro will be used by
    default. Only specify a keyword arg to override this value.

    All kwargs (except copy_props) are passed directly to LigandParameters,
    as defined in the LigandParamaters class in mmasl.h

    :ivar copy_props:
            If True then copy the ct-level properties from the searched
            structure to all the found ligand substructures.  If False, only
            the title will be copied.
    :vartype copy_props: bool

    :ivar min_heavy_atom_count:
            Minimum number of heavy atoms required in each ligand molecule.
    :vartype min_heavy_atom_count: int

    :ivar max_atom_count:
            Maximum number of heavy atoms for a ligand molecule (does not
            include hydrogens).
    :vartype max_atom_count: int

    :ivar allow_ion_only_molecules:
            Consider charged molecules to be ligands.
    :vartype allow_ion_only_molecules: bool

    :ivar allow_amino_acid_only_molecules:
            If True, consider small molecules containing only amino acids
            to be ligands.
    :vartype allow_amino_acid_only_molecules: bool

    :ivar excluded_residue_names:
            Set of PDB residue names corresponding to atoms which will never
            be ligands.
    :vartype excluded_residue_names: set[str]

    :ivar included_residue_names:
            Set of PDB residue names corresponding to atoms which always be
            considered ligands.
    :vartype included_residue_names: set[str]

    :see: `find_ligands` for a simple functional interface to this class.
    """

[docs]    def __init__(self, copy_props=True, **kwargs):
        """
        Initialize searcher.
        """
        self.copy_props = copy_props
        self.override_params = {}
        valid_params = [
            attr for attr in dir(mm.get_ligand_parameters())
            if not attr.startswith('_')
        ]
        for param, value in kwargs.items():
            if param not in valid_params:
                msg = 'Parameter {} is not a valid ligand parameter. Valid parameters are: {}'
                raise ValueError(msg.format(param, valid_params))
            self.override_params[param] = value

    """
    Properties: setting LigandParameters via this interface is deprecated.
    """

    @property
    def min_atom_count(self):
        return self._getLigandParameters().min_heavy_atom_count

    @min_atom_count.setter
    def min_atom_count(self, value):
        warnings.warn('min_atom_count is deprecated.',
                      DeprecationWarning,
                      stacklevel=2)
        self.override_params['min_heavy_atom_count'] = value

    @property
    def max_atom_count(self):
        return self._getLigandParameters().max_atom_count

    @max_atom_count.setter
    def max_atom_count(self, value):
        warnings.warn('max_atom_count is deprecated.',
                      DeprecationWarning,
                      stacklevel=2)
        self.override_params['max_atom_count'] = value

    @property
    def exclude_ions(self):
        return not self._getLigandParameters().allow_ion_only_molecules

    @exclude_ions.setter
    def exclude_ions(self, value):
        warnings.warn('exclude_ions is deprecated.',
                      DeprecationWarning,
                      stacklevel=2)
        self.override_params['allow_ion_only_molecules'] = not value

    @property
    def exclude_amino_acids(self):
        return not self._getLigandParameters().allow_amino_acid_only_molecules

    @exclude_amino_acids.setter
    def exclude_amino_acids(self, value):
        warnings.warn('exclude_amino_acids is deprecated.',
                      DeprecationWarning,
                      stacklevel=2)
        self.override_params['allow_amino_acid_only_molecules'] = not value

    @property
    def excluded_residues(self):
        return self._getLigandParameters().excluded_residue_names

    @excluded_residues.setter
    def excluded_residues(self, value):
        warnings.warn('excluded_residues is deprecated.',
                      DeprecationWarning,
                      stacklevel=2)
        self.override_params['excluded_residue_names'] = set(value)

    def _getLigandParameters(self):
        parameters = mm.get_ligand_parameters()
        for k, v in self.override_params.items():
            setattr(parameters, k, v)
        return parameters

[docs]    def search(self, st):
        """
        Find list of putative ligands matching either `ligand_asl` or the
        default internally generated ASL.

        :type st: `Structure`
        :param st: Structure to search for ligands.

        :rtype: list
        :return: a list of `Ligand` instances.  These are putative ligands
                 that match the ASL expression.  See `Ligand` for attributes.
        """

        ligand_mols = mm.find_ligand_molecules(st, self._getLigandParameters())
        return self._extract_ligands(st, ligand_mols)

    def _extract_ligands(self, st, ligand_mol_atoms):
        ligands = []

        # For each ligand, extract a substructure and
        # create a Ligand instance to contain information about it.
        for lig_atoms in ligand_mol_atoms:
            molecule_number = st.atom[lig_atoms[0]].molecule_number
            lig_st = st.extract(lig_atoms)
            if self.copy_props:
                lig_st.property.update(st.property)

            # PYTHON-1757: Generate the ASL with respect to the original
            # input structure:
            lig_asl = generate_asl(st, lig_atoms)

            covalent = len(lig_atoms) < len(st.molecule[molecule_number].atom)

            lig = Ligand(st, lig_st, molecule_number, lig_atoms, lig_asl,
                         covalent)
            ligands.append(lig)

        lig_count = len(ligands)
        msg = 'AslLigandSearcher.search basic search found %d' % lig_count
        logger.debug(msg)
        return ligands


[docs]class Ligand(object):
    """
    A putative `AslLigandSearcher` ligand structure with read-only data and
    convenience methods.

    `Ligand` items sort from smallest to largest, by total number of
    atoms, then by SMILES.

    Parameters:
    * complex_st: Original complex structure.
    * st: Ligand substructure
    * mol_num: Ligand molecule number in the original structure
    NOTE: molecule contains non-ligand atoms for covalently bound ligands.
    * atom_indexes: Atom indices into the original structure for this ligand.
    * atom_objects: List of ligand atom objects from the original structure.
    * lig_asl: ASL that matches the ligand atoms in the original structure.
    * is_covalently_bound: Whether the ligand is covalently bound. Depreacted.
    * pdbres: PDB residue name identifier.
    * centroid: Centroid of ligand as a 4-element numpy array: [x, y, z, 0.0]
    * unique_smiles: SMILES string representing this ligand structure.
    """

[docs]    def __init__(self,
                 complex_st,
                 st,
                 mol_num=None,
                 atom_indexes=None,
                 lig_asl=None,
                 is_covalently_bound=None):
        """

        :type st: `Structure`
        :param st: Original complex structure.

        :type st: `Structure`
        :param st: Ligand structure.

        :type mol_num: int
        :param mol_num:
                Molecular index identifier.  Typically, the mol.n from the
                original structure from whence this ligand structure was
                derived.  Note, depending on the nature of the ligand and
                the treatment of the original structure this mol.n index
                may not be valid.

        :type atom_indexes: list
        :param atom_indexes:
                Atom index identifiers.  Typically, the at.n from the original
                structure from whence this ligand structure was derived.

        :type lig_asl:  str
        :param lig_asl:
                ASL identifier.  Typically, the expression is defined in
                terms of the original structure from whence this ligand
                structure was derived.

        :deprecated is_covalently_bound: Whether this ligand is bonds to
                other atoms (including zero-order bonds). Will be False if the
                ligand spans a whole molecule.
        """

        self.complex_st = complex_st
        self._st = st
        self._mol_num = mol_num
        self._atom_indexes = atom_indexes
        self._lig_asl = lig_asl
        self._centroid = None
        self._smiles = None
        self._resname = None
        self._is_covalently_bound = is_covalently_bound
        return

    @property
    def is_covalently_bound(self):
        """
        The Ligand.is_covalently_bound property returns True if this ligand
        has any bonds (including zero-order) to any other atoms, and returns
        False if the ligand spans a complete molecule.
        """
        return self._is_covalently_bound

[docs]    def sort_key(self):
        """
        Enable sorting for `Ligand` objects:

            ligands.sort(key=lambda l: l.sort_key())

        Comparison criteria for sorting Ligands: total number of atoms, unique
        smiles string, centroid.

        :return: sort key
        :rtype: list
        """
        sort_items = [self.st.atom_total, self.unique_smiles]
        sort_items.extend(self.centroid.flatten())
        return sort_items

    def __str__(self):
        """
        :rtype: str
        :return: the unique SMILES string for this ligand.
        """

        return self.unique_smiles

    # Below are a slew of functions with @property decorators that
    # create read-only attributes.
    @property
    def mol_num(self):
        """
        Ligand's molecule number as defined upon instantiation.
        :rtype: int

        Warning: Depending on the nature of the ligand and the treatment of the
        original structure, e.g. zero-order bonds cut, this mol.n index may
        not be valid.
        """
        return self._mol_num

    @property
    def atom_indexes(self):
        """
        Indices of the Ligand atoms as defined upon instantiation.
        :rtype: list
        """
        return self._atom_indexes

    @property
    def atom_objects(self):
        """
        Atom objects from the original structure for the ligand atoms.
        :rtype: list
        """
        return [self.complex_st.atom[anum] for anum in self._atom_indexes]

    @property
    def pdbres(self):
        """
        PDB residue name identifier.  If the ligand is composed
        of multiple residues then the names are joined with a '-'
        separator.
        :rtype: str
        """

        if self._resname is None:
            resname = []
            for at in self._st.atom:
                pdbres = at.pdbres
                if pdbres not in resname:
                    resname.append(pdbres)
            self._resname = "-".join(resname)

        return self._resname

    @property
    def centroid(self):
        """
        Centroid of the Ligand as a 4-element numpy array: [x, y, z, 0.0]
        :rtype: 4-element `numpy array<numpy.ndarray>`
        """

        if self._centroid is None:
            self._centroid = transform.get_centroid(self._st)

        return self._centroid

    @property
    def unique_smiles(self):
        """
        Unique SMILES string representing this ligand structure.
        :rtype: str
        """

        if self._smiles is None:
            self._smiles = generate_smiles(
                self._st, unique=True, stereo=STEREO_FROM_ANNOTATION_AND_GEOM)
        return self._smiles

    @property
    def st(self):
        """
        Copy of the ligand `Structure`.
        :rtype: `Structure`
        """

        return self._st.copy()

    @property
    def ligand_asl(self):
        """
        Ligand_asl used when searching for the ligand.  The ASL defined the
        ligand in the context of its original structure.
        :rtype: str
        """

        return self._lig_asl


[docs]class MissingLoopFinder(object):
    """
    compare SEQRES and ATOM record and find missing loops.  Does not use the
    order of the residues in the CT to avoid issues that occur when missing
    loops are searched for after some loops have been added by another program
    """

[docs]    def __init__(self):
        pass

[docs]    def run(self, ct, include_tails=False, legacy_output=False, debug=False):
        """
        Compare the SEQRES records the structual atom records to find missing
        loops.

        :returns: tuples of (<residue object of the residue before the missing
            loop or NONE if this is a missing N-terminal tail>, <residue object
            of the residue after the missing loop or NONE if this is a missing
            C-terminal tail>, <list of residue types missing as a list of 3char
            strings>

        :param debug: is True then the allignments will be printed to stdout
        :type debug: bool

        :param include_tails: if True then missing N and C terminal tails will
            be included
        """
        # Get the SEQRES data from the ct
        seqres_chains, seqres_sequences = read_seqres_from_ct(ct)
        seqres_sequences = [[
            one_seq[i:i + 3] for i in range(0, len(one_seq), 4)
        ] for one_seq in seqres_sequences]
        res_for_seqres_sequences = [
            [None for res in one_seq] for one_seq in seqres_sequences
        ]
        ct_chains, ct_sequences = self._getCTSequences(ct)
        used_ct_sequence = [False for one_seq in ct_sequences]
        # Start doing alignments
        while True:
            # Find the best score
            best_map = None
            for ict, (one_ct_chain,
                      one_ct_sequence) in enumerate(zip(ct_chains,
                                                        ct_sequences)):
                if used_ct_sequence[ict]:
                    continue
                # Skip if all of the residues are nonstandard
                if not numpy.any([
                        mm.mmct_is_standard_residue(res.pdbres)
                        for res in one_ct_sequence
                ]):
                    continue
                one_ct_pdbname = [res.getCode() for res in one_ct_sequence]
                one_ct_pdbnum = [res.resnum for res in one_ct_sequence]
                for iseqres, (one_seqres_chain, one_seqres_sequence) in \
                        enumerate(zip(seqres_chains, seqres_sequences)):
                    if one_seqres_chain != one_ct_chain:
                        continue
                    one_seqres_pdbname = [
                        RESIDUE_MAP_3_TO_1_LETTER.get(pdbres, "X")
                        for pdbres in one_seqres_sequence
                    ]
                    for i in range(len(res_for_seqres_sequences[iseqres])):
                        if res_for_seqres_sequences[iseqres][i] is not None:
                            one_seqres_pdbname[i] = 'X'
                    map, map_score = seq_align_match(one_seqres_pdbname,
                                                     one_ct_pdbname,
                                                     one_ct_pdbnum,
                                                     allow_frag_gaps=True)
                    if best_map is None or best_map[1] < map_score:
                        best_map = (map, map_score, ict, iseqres)
            if best_map is None or best_map[1] <= 0:
                break
            # Mark these residues as used
            map, map_score, ict, iseqres = best_map
            used_ct_sequence[ict] = True
            ires = 0
            iseqres_res = 0
            for i in range(len(map)):
                if map[i] == 'M':
                    res_for_seqres_sequences[iseqres][iseqres_res] = \
                        ct_sequences[ict][ires]
                if map[i] in ['M', 'R']:
                    ires += 1
                if map[i] in ['M', 'U']:
                    iseqres_res += 1
        missing_loops = []
        for chain, sequence, residues in zip(seqres_chains, seqres_sequences,
                                             res_for_seqres_sequences):
            first_res_missing_loop = None
            missing_loop_sequence = []
            for isr, (one_seq, one_res) in enumerate(zip(sequence, residues)):
                if one_res is None:
                    if not missing_loop_sequence:
                        if isr == 0:
                            first_res_missing_loop = None
                        else:
                            first_res_missing_loop = residues[isr - 1]
                    missing_loop_sequence.append(one_seq)
                    res_name = "MISSING"
                else:
                    if missing_loop_sequence:
                        missing_loops.append((first_res_missing_loop, one_res,
                                              missing_loop_sequence))
                        missing_loop_sequence = []
                    res_name = "%s:%d%s(%s)" % (one_res.chain, one_res.resnum,
                                                one_res.inscode, one_res.pdbres)
                if debug:
                    print("%s: %3s: %s" % (chain, one_seq, res_name))
            if missing_loop_sequence:
                missing_loops.append(
                    (first_res_missing_loop, None, missing_loop_sequence))
        if not include_tails:
            missing_loops = [(beg_res, end_res, seq)
                             for (beg_res, end_res, seq) in missing_loops
                             if (beg_res is not None and end_res is not None)]
        if legacy_output:
            return ["%s-%s" % (beg, end) for (beg, end, seq) in missing_loops]
        else:
            return missing_loops

    def _getCTSequences(self, ct):
        """
        Get the residues sequenecs from the structure
        input ct{Schrodinger.Structure} input structure
        Output is a tuple of lists.  The first list is the chainID
        for each of the chains in the molecule as a list of 1-character
        strings.  The second is a list of lists with with the 3-letter code for
        each residue in the sequence of each chain.  All chains are seperated
        wherever a chain break occurs even if both sets of resideus are on
        the same chain
        """
        # Read in all of the sequence fragments from the structure
        ct_sequences = []
        one_chain = None
        one_sequence = []
        last_res = None
        for chain in ct.chain:
            for res in get_residues_by_connectivity(chain):
                # Deal with disconnected density
                if (res.getAlphaCarbon() is None and
                        res.getCarbonylCarbon() is None and
                        res.getBackboneNitrogen() is None):
                    continue
                if (one_chain is None or last_res is None or
                        one_chain != res.chain or
                        not self._residuesAreConnected(last_res, res)):
                    if one_sequence is not None and one_chain is not None:
                        ct_sequences.append(one_sequence)
                        one_chain = None
                        one_sequence = []
                one_chain = res.chain
                one_sequence.append(res)
                last_res = res
        if one_sequence is not None and one_chain is not None:
            ct_sequences.append(one_sequence)
        # Sort the chains by the chain name and residue number
        ct_sequences.sort(key=lambda x: (x[0].chain, x[0].resnum, x[0].inscode))
        # Pull the chain for each sequence
        ct_chains = [one_sequence[0].chain for one_sequence in ct_sequences]
        return ct_chains, ct_sequences

    def _residuesAreConnected(self, res1, res2):
        """
        Return True if there are any bonds connecting res1 and res2, False otherwise

        inputs
            res1{Schrodinger.Structure._Resdiue} Input residue 1
            res2{Schrodinger.Structure._Resdiue} Input residue 2
        """
        if res1.structure != res2.structure:
            return False
        res2_atoms = set(res2.getAtomIndices())
        res1_bonded_atoms = set()
        for atom in res1.atom:
            for bonded_atom in atom.bonded_atoms:
                if int(bonded_atom) in res2_atoms:
                    return True
        return False


[docs]def get_low_energy_reps(sts, eps, key=None):
    """
    Cluster the given structures by energy using the given energy
    key function and precision and return the lowest energy structures
    from each cluster sorted by energy.

    :type sts: list[`schrodinger.structure.Structure`]
    :param sts: the structures

    :type eps: float
    :param eps: the precision that controls the size of the clusters,
        see sklearn.cluster.DBSCAN documentation for more details

    :type key: function or None
    :param key: the function to get the energy from the structure, if None
        minimize.compute_energy will be used

    :raise ValueError: if there is an issue

    :rtype: list[`schrodinger.structure.Structure`]
    :return: representative structures sorted by increasing energy
    """

    if not key:
        key = lambda x: minimize.compute_energy(x)

    # in case the energy function is expensive cache it
    tmp_energy = 'r_user_tmp_energy'
    for st in sts:
        if tmp_energy in st.property:
            # just in case if for some reason the caller has 'r_user_tmp_energy'
            # defined for a different purpose
            raise ValueError(f'The {tmp_energy} is already defined.')
        st.property[tmp_energy] = key(st)
    tmp_key = lambda x: x.property[tmp_energy]

    groups = mathutils.dbscan_cluster(sts, eps, key=tmp_key)
    sts = [min(group, key=tmp_key) for group in groups.values()]
    sts = sorted(sts, key=tmp_key)
    for st in sts:
        st.property.pop(tmp_energy)
    return sts