Source code for schrodinger.application.matsci.clusterstruct

"""
Functions to aid in pulling dimers or clusters of nearest neighbors from a
larger structure. Also functions for replacing atoms in neighboring molecules
with partial charges for Jaguar calculations.

Copyright Schrodinger, LLC. All rights reserved.
"""

import math
from collections import defaultdict
from collections import namedtuple

import numpy

from schrodinger import structure
from schrodinger.application.desmond import cms as desmond_cms
from schrodinger.application.jaguar import input as jaginput
from schrodinger.application.matsci import coarsegrain
from schrodinger.application.matsci import jaguarworkflows
from schrodinger.application.matsci import jobutils
from schrodinger.application.matsci import msutils
from schrodinger.application.matsci import rdpattern
from schrodinger.application.matsci import textlogger
from schrodinger.infra import structure as infrastructure
from schrodinger.job import jobcontrol
from schrodinger.structutils import analyze
from schrodinger.structutils import transform

Neighbor = namedtuple('Neighbor', ['home_atom', 'neighbor_atom', 'dsq'])
NEIGHBOR_ATOM_PROP = 'b_matsci_cluster_neighbor'
APPROACH_PROP = 'r_matsci_closest_approach_(Angstrom)'
ESP_CHARGE_PROP = 'r_matsci_ESP_Charges'
DEFAULT_CHARGE_PROP = 'r_m_charge1'
BASIS_FLAG = '-basis'
USE_Q_FLAG = '-use_charges'
ESP_ARGPARSE_VALUE = 'esp'

# Species-related
NO_MOLS = 'no'
ONE_ONLY_MOL = 'one and only one'
AT_LEAST_ONE_MOL = 'at least one'
TWO_MOLS = 'two'
CRITERIA = [NO_MOLS, ONE_ONLY_MOL, AT_LEAST_ONE_MOL, TWO_MOLS]


[docs]class SpeciesData(object):
    """
    Tracks information about each chemically distinct molecule in a system
    """

    TYPE_LABEL = 'Mols'

[docs]    def __init__(self, smiles=None, group_name=None):
        """
        Create a SpeciesData instance

        :type smiles: str or None
        :param smiles: The unique smiles string for this species

        :type group_name: str or None
        :param group_name: The user-facing name of the species
        """

        self.smiles = smiles
        self.group_name = group_name
        self.members = set()
        self.charges = []
        self.charge_jobname = ""

        # For backwards compatibility. Use self.group_name instead
        self.formula = group_name

[docs]    def __len__(self):
        """
        :rtype: int
        :return: The number of molecules of this species
        """
        return len(self.members)

[docs]    def addMember(self, stindex, molnumber):
        """
        Add a new member of this species

        :type stindex: int
        :param stindex: The structure index this member comes from. It is up to
            the calling code to generate and make use of this value.

        :type molnumber: int
        :param molnumber: The molecule number in the structure that is a member
            of this species
        """

        self.members.add((stindex, molnumber))

[docs]    def isMember(self, stindex, molnumber):
        """
        Check if a molecule is a member of this species

        :type stindex: int
        :param stindex: The structure index this molecule comes from.

        :type molnumber: int
        :param molnumber: The molecule number that is being checked
        """

        return (stindex, molnumber) in self.members

[docs]    def addCharges(self, struct, prop='r_j_ESP_Charges'):
        """
        Add atomic charge information to this SpeciesData object (does not
        actually add the charges as properties to structures). The element
        and charge for every atom are stored as a tuple in the charges list. The
        element is stored to help ensure that any future structure the charges
        are applied to has the same atom ordering as struct.

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure of this species. Used to determine the
            element of each atom and the charge of each atom.

        :type prop: str
        :param prop: The property name to use to obtain charges from each atom
            in struct
        """

        self.charges = []
        for atom in struct.atom:
            self.charges.append((atom.element, atom.property[prop]))

[docs]    def getSampleIndices(self):
        """
        Get the st and mol indices of a sample

        :rtype: tuple
        :return: Tuple of (st_index, mol_index)
        """
        mlist = list(self.members)
        mlist.sort()
        st_index, mol_index = mlist[0]
        return st_index, mol_index

[docs]    def getSampleMolNumber(self):
        """
        Get a sample molecule number for the species. Assumes that all members
        belong to a single structure.

        :return int: A sample molecule index of this species
        """
        st_index, mol_num = self.getSampleIndices()
        return mol_num

[docs]    def getSampleStructure(self, struct_iterator):
        """
        Get a sample structure for this species

        :type struct_iterator: iterable
        :param struct_iterator: An iterable of
            `schrodinger.structure.Structure` objects used to create this
            SpeciesData object

        :rtype: `schrodinger.structure.Structure`
        :return: A structure object representing a molecule of this species
        """
        stindex, molnum = self.getSampleIndices()
        for index, struct in enumerate(struct_iterator):
            if index == stindex:
                break
        indexes = struct.molecule[molnum].getAtomIndices()
        return struct.extract(indexes)

[docs]    def getAllAtomIds(self, struct):
        """
        Get all atom ids belonging to this species in the passed structure.
        Assumes that all members belong to a single structure.

        :param `structure.Structure` struct: The structure to get atom ids in

        :return list(int): List of atom ids
        """
        atom_ids = []
        for _, mol_id in self.members:
            atom_ids.extend(struct.molecule[mol_id].getAtomIndices())
        return atom_ids

[docs]    def getExampleMemberTag(self):
        """
        Get an additional tag that gets an example molecule number for this
        species

        :rtype: str
        :return: The tag for this species
        """
        return get_example_mol_tag(self)


[docs]class AtomGroupData(SpeciesData):
    """
    Tracks information about each group of atoms in the system
    """

    TYPE_LABEL = '#'

[docs]    def addMember(self, stindex, atom_ids):
        """
        Add a new member of this species

        :type stindex: int
        :param stindex: The structure index this member comes from.

        :type atom_ids: list
        :param atom_ids: The list of atom indices in the structure that are
            member of this species
        """

        self.members.add((stindex, tuple(atom_ids)))

[docs]    def isMember(self, stindex, atom_ids):
        """
        Check if passed atom ids are member of the species

        :type stindex: int
        :param stindex: The structure index this member comes from.

        :type atom_ids: list
        :param atom_ids: The list of atom indices to check in the structure
            that are member of this species

        :return: Whether the passed atom ids are member
        :rtype: bool
        """

        search_atoms = set(atom_ids)
        for idx, atom_ids in self.members:
            if stindex != idx:
                continue
            if search_atoms.issubset(set(atom_ids)):
                return True
        return False

[docs]    def getSampleIndices(self):
        """
        Get the st and atom indices of a sample

        :rtype: tuple
        :return: Tuple of (st_index, atom_indices)
        """
        mlist = list(self.members)
        mlist.sort()
        return mlist[0]

[docs]    def getSampleStructure(self, struct_iterator):
        """
        Get a sample structure for this species

        :type struct_iterator: iterable
        :param struct_iterator: An iterable of
            `schrodinger.structure.Structure` objects used to create this
            SpeciesData object

        :rtype: `schrodinger.structure.Structure`
        :return: A structure object representing a molecule of this species
        """
        stindex, indexes = self.getSampleIndices()
        for index, struct in enumerate(struct_iterator):
            if index == stindex:
                break
        return struct.extract(indexes)

[docs]    def getExampleMemberTag(self):
        """
        Get an additional tag that gets an example atom number for this
        species

        :rtype: str
        :return: The tag for this species
        """

        stindex, indexes = self.getSampleIndices()
        example_atom_tag = ' Example Included Atom #: %d' % sorted(indexes)[0]
        return example_atom_tag


[docs]def create_pbc(struct):
    """
    Create an infrastructure PBC

    :type struct: `schrodinger.structure.Structure`
    :param struct: The input structure, should have the Chorus box properties
        set

    :rtype: `schrodinger.infra.structure.PBC`
    :return: The infrastructure PBC created from the Chorus box properties

    :raise ValueError: If the Chorus box properties are not set
    """

    try:
        return infrastructure.PBC(*desmond_cms.get_box(struct))
    except KeyError:
        raise ValueError('The input structure must have the Chorus Box '
                         'properties set.')


[docs]def create_distance_cell(struct, distance, pbc=None):
    """
    Create an infrastructure Distance Cell. If struct has the Chorus box
    properties, the Distance Cell will be PBC-aware.

    :type struct: `schrodinger.structure.Structure`
    :param struct: The input structure, may have the Chorus box properties

    :type distance: float
    :param distance: The cutoff for finding nearest neighbor atoms

    :type pbc: `schrodinger.infra.structure.PBC`
    :param pbc: The infrastructure PBC for this structure. If not provided, an
        attempt will be made to create one from the Chorus box properties set on
        the structure - if this attempt fails, a non-PBC DistanceCell will be
        created.

    :rtype: `schrodinger.infra.structure.DistanceCell`, pbc
    :return: An infrastructure Distance Cell that accounts for the PBC, and the
        pbc used to create it. If no pbc was used, pbc=None. If the pbc was
        passed in, the same pbc is passed out unchanged. The distance cell has
        a size property that gives the distance it was created with.
    """

    if not pbc:
        try:
            pbc = create_pbc(struct)
        except ValueError:
            pbc = None
    if pbc:
        dcell = infrastructure.DistanceCell(struct, distance, pbc)
    else:
        dcell = infrastructure.DistanceCell(struct, distance)
    # It is sometimes useful to know what distance the cell was created with.
    # Store this information until SHARED-7773 is implemented.
    dcell.size = distance
    return dcell, pbc


[docs]def get_extracted_atom_index(atom_indexes, index):
    """
    Get the index of the atom that has atom.index = index in a new structure
    that would be created by extracting the atoms in atom_indexes

    :type atom_indexes: list of int
    :param atom_indexes: The atom indexes that will be extracted to form a new
        structure

    :type index: int
    :param index: The atom index of the atom of interest

    :rtype: int or None
    :return: The new index of the atom in the extracted structure, or None if
        index is not found in atom_indexes
    """

    exatoms = atom_indexes[:]
    exatoms.sort()
    try:
        return exatoms.index(index) + 1
    except IndexError:
        return None


[docs]class Dimer(object):
    """
    Class to hold and process a pair of neighboring molecules.
    """

[docs]    def __init__(self, stindex, mol1, mol2, neighbor_info, pbc=None):
        """
        Create a Dimer object

        :type stindex: int
        :param stindex: The index of the structure this dimer comes from

        :type mol1: int
        :param mol1: The molecule number in the structure of the first
            molecule in this dimer

        :type mol2: int
        :param mol2: The molecule number in the structure of the second molecule
            in this dimer

        :type neighbor_info: `Neighbor`
        :param neighbor_info: The Neighbor information for this dimer

        :type pbc: `schrodinger.infra.structure.PBC`
        :param pbc: The PBC object to use for this dimer, if any
        """

        self.stindex = stindex
        self.molnumbers = (mol1, mol2)
        self.meets_species_criterion = True
        self.neighbor_info = neighbor_info
        self.pbc = pbc

[docs]    def evaluateSpeciesCriterion(self, crit_species, crit, species):
        """
        Check whether this dimer meets the species criterion - sets the
        meets_species_criterion property.

        :type crit_species: `SpeciesData` or None
        :param crit_species: The species the criterion applies to. Use None if
            there are no criteria - the dimer will be marked as meeting all criteria

        :type crit: str
        :param crit: The criterion - should be one of the constants NO_MOLS,
            ONE_ONLY_MOL, AT_LEAST_ONE_MOL, TWO_MOLS

        :type species: dict
        :param species: The dictionary containing information for all species.
            Keys are unique SMILES strings for a species, values are `SpeciesData`
            for the species with that SMILES string.
        """

        if crit_species is None:
            self.meets_species_criterion = True
            return

        all_species = []
        for mol in self.molnumbers:
            for data in species.values():
                if data.isMember(self.stindex, mol):
                    all_species.append(data)
                    break
        matches = all_species.count(crit_species)
        if crit == NO_MOLS:
            self.meets_species_criterion = not matches
        elif crit == ONE_ONLY_MOL:
            self.meets_species_criterion = matches == 1
        elif crit == AT_LEAST_ONE_MOL:
            self.meets_species_criterion = matches >= 1
        else:
            self.meets_species_criterion = matches == 2

[docs]    def getPairStructure(self, struct):
        """
        Create the dimer structure

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure containing the dimer molecules

        :rtype: `schrodinger.structure.Structure`
        :return: The structure object for the dimer
        """

        if self.pbc:
            pair = self.getPeriodicPairStructure(struct)
        else:
            pair = self.getNonPeriodicPairStructure(struct)
        pair.property[APPROACH_PROP] = math.sqrt(self.neighbor_info.dsq)
        basename = struct.title.replace(' ', '_') or str(self.stindex)
        pair.title = basename + '_%d_%d' % self.molnumbers
        return pair

[docs]    def getClusterStructure(self,
                            struct,
                            cell=None,
                            distance=None,
                            heavy_only=False,
                            by_com=False):
        """
        Get a structure that is the dimer surrounded by all nearest neighbor
        molecules. The dimer and all molecules are positioned at their nearest
        neighbor images to the original position of the first molecule in the
        dimer.

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure containing this Dimer and all neighbor
            molecules

        :type cell: `schrodinger.infra.structure.DistanceCell`
        :param cell: The DistanceCell to use when finding nearest neighbors. If
            not given, will be calculated based on struct and distance. Either cell
            or distance must be given.

        :type distance: float
        :param distance: The distance to use when creating a new distance cell
            if needed. Either cell or distance must be given.

        :type heavy_only: bool
        :param heavy_only: Use only heavy atom distances when finding the
            cluster

        :param bool by_com: If True, use center of masses when determining
            distance

        :rtype: `schrodinger.structure.Structure`
        :return: The structure of the dimer with nearest neighbor molecules. The
            dimer molecules appear first in the cluster.
        """

        # Find the xyz coordinates that puts the dimer molecules in their
        # nearest neighbor images
        dimer_st = self.getPairStructure(struct)
        # Now change the XYZ coordinates in the parent structure to the dimer
        # coordinates
        dimer_indexes = list(range(1, dimer_st.atom_total + 1))
        parent_indexes = []
        for molnum in self.molnumbers:
            parent_indexes += struct.molecule[molnum].getAtomIndices()
        for dindex, pindex in zip(dimer_indexes, parent_indexes):
            struct.atom[pindex].xyz = dimer_st.atom[dindex].xyz

        # Make sure we have a distance cell
        if not cell and not distance:
            raise RuntimeError('Either cell or distance must be provided')
        if not cell:
            cell, pbc = create_distance_cell(struct, distance, pbc=self.pbc)

        indexes = []
        for molnum in self.molnumbers:
            indexes.extend(struct.molecule[molnum].getAtomIndices())
        return create_nearest_neighbor_cluster(struct,
                                               indexes,
                                               cell,
                                               pbc=self.pbc,
                                               heavy_only=heavy_only,
                                               by_com=by_com)

[docs]    def getPeriodicPairStructure(self, struct):
        """
        Create a dimer structure for a pair of molecules that takes into account
        the periodic boundary condition.

        If | represents a cell boundary, and .... represents some long distance,
        imagine two molecules X and Y on opposite sides of the cell.

        Y|X....Y|X

        We need to extract coordinates for Y that represent the Y|X case, not
        the X....Y case.

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure containing the dimer molecules

        :rtype: `schrodinger.structure.Structure`
        :return: The structure object for the dimer
        """

        mol1, mol2 = self.molnumbers
        atoms = struct.molecule[mol1].getAtomIndices()
        nearest_neighbor = {mol2: self.neighbor_info}
        pair_list = create_dimer_structures(struct,
                                            atoms,
                                            self.pbc,
                                            nearest_neighbors=nearest_neighbor)
        return pair_list[0]

[docs]    def getNonPeriodicPairStructure(self, struct):
        """
        Create a dimer structure for a pair of molecules in a non-periodic
        system

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure containing the dimer molecules

        :rtype: `schrodinger.structure.Structure`
        :return: The structure object for the dimer
        """

        return create_non_pbc_cluster(struct, self.molnumbers)


# tracker: COMTracker, struct: COMTracker.struct, cell: DistanceCell
ComInfo = namedtuple('ComInfo', ['tracker', 'struct', 'cell'])


[docs]class COMTracker(object):
    """
    When using the center of mass to measure between molecules, we do the
    following:
    - Create a new structure without any atoms except for a single atom at each
    molecule's center of mass (a "COM structure")
    - Use this COM structure for all measurements/clustering
    - Once the dimers/clusters have been determined using the center of mass
    atoms, use the positions of the center of mass atoms to place the original
    molecules at the new center of mass positions.

    The key to the COM structure is that center of mass atoms have the same
    atom index as the molecule index of the molecule in the atom structure they
    represent, and their position indicates the new position of the of the
    corresponding atomistic molecule.

    This class manages the COM structure and various manipulations
    """

    ORIGINAL_MOLNUM_PROP = 'i_matsci_original_molecule_number'

[docs]    def __init__(self, atom_struct):
        """
        Create a COMTracker object

        :type atom_struct: `schrodinger.structure.Structure`
        :param atom_struct: The atom structure that the COM structure will mimic
        """

        self.atom_struct = atom_struct
        contract_by_molecule(self.atom_struct)
        self.struct = self.createMimic(self.atom_struct)
        self.addCOMAtoms()
        self.struct_orig = self.struct.copy()

[docs]    def addCOMAtoms(self):
        """
        Add an atom to the COM structure at the center of mass of each molecule
        in the atom structure
        """

        for mol in self.atom_struct.molecule:
            com = analyze.center_of_mass(self.atom_struct,
                                         atom_indices=mol.getAtomIndices())
            molat = self.struct.addAtom('C', *com)
            # This is used to refer back to the original molecule in the
            # atomistic structure after the COM atom has been extracted into a
            # smaller cluster structure (and therefore it's index no longer
            # corresponds to the original molecule number).
            molat.property[self.ORIGINAL_MOLNUM_PROP] = mol.number

[docs]    def createMimic(self, struct):
        """
        Create an empty structure object with the same title and properties as
        struct

        :type struct: `schrodinger.structure.Structure`
        :param struct: The structure to create a mimic of

        :rtype: `schrodinger.structure.Structure`
        :return: An empty structure with the same title and properties as the
            input structure
        """

        mimic = structure.create_new_structure()
        mimic.title = struct.title
        mimic.property.update(struct.property)
        return mimic

[docs]    def convertCOMToAtomCluster(self, com_cluster):
        """
        Given a structure with center of mass atoms, create a new structure with
        the original molecules located at their corresponding center of mass

        :type com_cluster: `schrodinger.structure.Structure`
        :param com_cluster: The COM structure to create atomistic structure of

        :rtype: `schrodinger.structure.Structure`
        :return: An atomistic structure with molecules located at the same
            positions as the center of mass atoms in com_cluster
        """

        cluster = self.createMimic(com_cluster)
        nprop = NEIGHBOR_ATOM_PROP
        for molat in com_cluster.atom:
            # Get the structure of the corresponding atomistic molecule
            orig_num = molat.property[self.ORIGINAL_MOLNUM_PROP]
            atst = self.atom_struct.molecule[orig_num].extractStructure()

            # Preserve any cluster neighbor information
            try:
                for atom in atst.atom:
                    atom.property[nprop] = molat.property[nprop]
            except KeyError:
                # Either one atom will have the property or none will
                pass

            # Shift the molecule into place
            original_molat = self.struct_orig.atom[orig_num]
            shift = numpy.array(molat.xyz) - numpy.array(original_molat.xyz)
            atst.setXYZ(atst.getXYZ() + shift)

            cluster.extend(atst)
        return cluster


[docs]def create_center_of_mass_info(struct, distance):
    """
    Create information needed for clustering by center of mass

    :param `structure.Structure` struct: The atomistic structure to use to
        find center of masses

    :param float distance: The distance for the distance cell to create

    :rtype: ComInfo
    :return: Center of mass info - a tracker, structure and distance cell
    """

    tracker = COMTracker(struct)
    cstruct = tracker.struct
    cell, pbc = create_distance_cell(cstruct, distance)
    return ComInfo(tracker=tracker, struct=cstruct, cell=cell)


[docs]def find_atoms_closest_to_atoms(struct,
                                atoms,
                                cell,
                                only_greater=False,
                                heavy_only=False):
    """
    Find all the atoms that are within a certain distance of the provided atoms,
    returning information about those atoms.

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure object containing the atoms to search

    :type atoms: list of int or `schrodinger.structure._StructureAtom`
    :param atoms: The list of atoms to find atoms near. Can be atom objects or
        integer indexes. If this list contains hydrogen atoms, they will still
        be used even if heavy_only=True.

    :type cell: `schrodinger.infra.structure.DistanceCell`
    :param cell: The distance cell to use for finding nearby atoms

    :type only_greater: bool
    :param only_greater: Whether to only find nearby atoms in molecules with
        indexes greater than the molecule they are close to

    :type heavy_only: bool
    :param heavy_only: Return only heavy atoms that are close to the given
        atoms. This parameter only affects atoms found, it has no effect on the
        list of atoms provided in the atoms parameter.

    :rtype: dict
    :return: Dictionary with keys that are molecule numbers and values that are
        a list of `Neighbor` objects for each atom in that molecule that is
        nearby one of the atoms in the atoms list. Atoms that are nearby
        multiple atoms in the atoms list will appear multiple times, once for
        each atom it is nearby.
    """

    # Taking the int allows atoms to be atoms or atom indexes
    input_atoms = set([int(x) for x in atoms])
    neighbors_by_mol = {}
    for index in input_atoms:
        iatom = struct.atom[index]
        matches = cell.query_atoms(*iatom.xyz)
        for match in matches:
            matom = struct.atom[match.getIndex()]
            if heavy_only and matom.element == 'H':
                continue
            if matom.index not in input_atoms:
                molnum = matom.molecule_number
                if molnum > iatom.molecule_number or not only_greater:
                    nbor = Neighbor(iatom, matom, match.getDistanceSquared())
                    neighbors_by_mol.setdefault(molnum, []).append(nbor)
    return neighbors_by_mol


[docs]def find_molecules_closest_to_atoms(struct,
                                    atoms,
                                    cell,
                                    only_greater=False,
                                    heavy_only=False,
                                    by_com=False):
    """
    Find all the molecules that are within a certain distance of the provided
    atoms, returning information about those molecules

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure object containing the atoms to search

    :type atoms: list of int or `schrodinger.structure._StructureAtom`
    :param atoms: The list of atoms to find molecules near. Can be atom objects
        or integer indexes. This list of atoms is not affected by the heavy_only
        paramter, it may contain H atoms and those atoms will be used.

    :type cell: `schrodinger.infra.structure.DistanceCell`
    :param cell: The distance cell to use for finding nearby molecules

    :type only_greater: bool
    :param only_greater: Whether to only find nearby molecules with indexes
        greather than the molecule they are close to

    :type heavy_only: bool
    :param heavy_only: Use only heavy atoms when finding close molecules. This
        only affects the found molecules and has no effect on the provided atom
        list in atoms

    :type by_com: bool or `ComInfo`
    :param by_com: If True, create the info needed to find neighbors by
        center of mass instead of atom distance. If `ComInfo`, this is the
        pre-calculated data to use for center of mass distance. If False,
        do not find neighbors by center of mass. heavy_only is ignored if
        by_com is not False.

    :rtype: dict
    :return: Dictionary with keys that are molecule numbers and values are
        `Neighbor` objects for the atom in that molecule that is closest to
        one of the atoms in the atoms list
    """

    if by_com is True:
        # User requested using center of mass information but did not supply it
        by_com = create_center_of_mass_info(struct, cell.size)

    if by_com:
        # If using center of mass as the distance, get the list of molecule
        # indexes that are close to the given molecules. The key here is that
        # each atom in the ComInfo.struct structure corresponds to the center
        # of mass of the molecule in struct with the same index (atom index
        # of ComInfo.struct = molecule number of corresponding struct molecule)
        close_coms = defaultdict(list)
        mols = {struct.atom[x].molecule_number for x in atoms}
        for mol in mols:
            for match in by_com.cell.query_atoms(*by_com.struct.atom[mol].xyz):
                molnum = match.getIndex()
                if (only_greater and molnum > mol) or not only_greater:
                    close_coms[mol].append(molnum)
        if not close_coms:
            # No molecules within range
            return {}

    # Find all close atoms in the atomistic structure
    neighbors_by_mol = find_atoms_closest_to_atoms(struct,
                                                   atoms,
                                                   cell,
                                                   only_greater=only_greater,
                                                   heavy_only=heavy_only)

    if by_com:
        # Filter out those atoms belonging to molecules whose center of mass is
        # too far away
        filtered = defaultdict(list)
        for neighbor_molnum, neighbors in neighbors_by_mol.items():
            for neighbor in neighbors:
                home_molnum = neighbor.home_atom.molecule_number
                if neighbor_molnum in close_coms[home_molnum]:
                    filtered[neighbor_molnum].append(neighbor)
        neighbors_by_mol = filtered
    # Reduce to only the closest atom in each molecule
    nearest_neighbors = {}
    for molnum, matches in neighbors_by_mol.items():
        mindist = 100000.
        for neighbor in matches:
            if neighbor.dsq < mindist:
                nearest_neighbors[molnum] = neighbor
                mindist = neighbor.dsq
    return nearest_neighbors


[docs]def get_dimers_with_molecule(struct,
                             molecule,
                             cell,
                             pbc=None,
                             only_greater=False,
                             struct_index=0,
                             heavy_only=False,
                             by_com=False):
    """
    Get a Dimer object for every molecule within a given distance of the
    provided molecule

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure object containing the molecules to search

    :type molecule: `schrodinger.structure._Molecule`
    :param molecule: The molecule to find dimers of

    :type cell: `schrodinger.infra.structure.DistanceCell`
    :param cell: The distance cell to use for finding nearby molecules

    :type pbc: `schrodinger.infra.structure.PBC`
    :param pbc: The PBC object used to create the distance cell

    :type only_greater: bool
    :param only_greater: Whether to only find Dimers with molecules that have
        indexes greather than the provided molecule

    :type struct_index: int
    :param struct_index: An index for the structure that is providing these
        dimers. This is passed to the Dimer class and can be used by calling
        routines to track Dimers from different structures.

    :type heavy_only: bool
    :param heavy_only: Use only heavy atoms when determining dimers

    :type by_com: bool or `ComInfo`
    :param by_com: If True, create the info needed to find neighbors by
        center of mass instead of atom distance. If `ComInfo`, this is the
        pre-calculated data to use for center of mass distance. If False,
        do not find neighbors by center of mass. heavy_only is ignored if
        by_com is not False.

    :rtype: list of `Dimer`
    :return: A `Dimer` object for each dimer that molecule forms
    """

    if heavy_only:
        atoms = [x.index for x in molecule.atom if x.element != 'H']
    else:
        atoms = molecule.getAtomIndices()
    mindex = molecule.number
    nearest_neighbors = find_molecules_closest_to_atoms(
        struct,
        atoms,
        cell,
        only_greater=only_greater,
        heavy_only=heavy_only,
        by_com=by_com)
    return [
        Dimer(struct_index, mindex, x, y, pbc)
        for x, y in nearest_neighbors.items()
    ]


[docs]def get_dimers_in_structure(struct,
                            cell=None,
                            pbc=None,
                            distance=None,
                            struct_index=0,
                            heavy_only=False,
                            by_com=False):
    """
    Get a Dimer object for every dimer in struct

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure object containing the dimers to find

    :type cell: `schrodinger.infra.structure.DistanceCell`
    :param cell: The distance cell to use for finding dimers

    :type pbc: `schrodinger.infra.structure.PBC`
    :param pbc: The PBC object used to create the distance cell

    :type distance: float
    :param distance: The distance to use in creating a distance cell if none is
        provided

    :type struct_index: int
    :param struct_index: An index for the structure that is providing these
        dimers. This is passed to the Dimer class and can be used by calling
        routines to track Dimers from different structures.

    :type heavy_only: bool
    :param heavy_only: Use only heavy atoms when determining dimers

    :type by_com: bool or `ComInfo`
    :param by_com: If True, create the info needed to find neighbors by
        center of mass instead of atom distance. If `ComInfo`, this is the
        pre-calculated data to use for center of mass distance. If False,
        do not find neighbors by center of mass. heavy_only is ignored if
        by_com is not False.

    :rtype: list of `Dimer`
    :return: A `Dimer` object for each dimer that molecule forms

    :raise RuntimeError: If neither cell nor distance are provided
    """

    if not cell:
        if not distance:
            raise RuntimeError('Either cell or distance must be provided')
        cell, pbc = create_distance_cell(struct, distance, pbc=pbc)
    if by_com is True:
        if cell:
            size = cell.size
        else:
            size = distance
        by_com = create_center_of_mass_info(struct, size)

    all_dimers = []
    for mol in struct.molecule:
        dimers = get_dimers_with_molecule(struct,
                                          mol,
                                          cell,
                                          pbc=pbc,
                                          only_greater=True,
                                          struct_index=struct_index,
                                          heavy_only=heavy_only,
                                          by_com=by_com)
        all_dimers.extend(dimers)
    return all_dimers


[docs]def move_atoms_next_to_xyz(pbc,
                           home_point,
                           struct,
                           moving_point,
                           moving_indexes=None):
    """
    Move a structure to be at the PBC image that is closest to xyz. The atoms in
    the structure are moved as a single group so that they maintain their
    coordinates relative to each other.

    :note: The difference between the method in this function and the
        pbc.getNearestImages() method for finding atom locations is that this
        function is guaranteed to move all atoms with the same vector.
        getNearestImages() is not and can potentially split groups of atoms into
        different images.

    :type pbc: `schrodinger.infra.structure.PBC`
    :param pbc: The PBC object used to find images

    :type home_point: list[float]
    :param home_point: The position to move the atoms next to

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure object to move

    :type moving_point: list[float]
    :param moving_point: The vector by which struct is moved is the vector that
        moves moving_point to its image closest to home_point.

    :type moving_indexes: list of int
    :param moving_indexes: If None, all of struct will be moved. Otherwise, only
        the atom indexes in this list will be moved.
    """

    # This points from home_point to where moving_point should be
    shortest_vec = pbc.getShortestVector(*home_point, *moving_point)
    # This points from where moving_point currently is to home_point
    current_vec = [a - b for a, b in zip(home_point, moving_point)]
    # Adding these two vecs together gets a vector that moves moving_point from
    # where it is to where it must go
    move_vec = numpy.array([a + b for a, b in zip(shortest_vec, current_vec)])
    if transform.get_vector_magnitude(move_vec) < 0.01:
        # moving_point is already at the right location
        return

    # Actually move the atoms
    if moving_indexes:
        for index in moving_indexes:
            matom = struct.atom[index]
            matom.xyz += move_vec
    else:
        struct.setXYZ(struct.getXYZ() + move_vec)


[docs]def contract_by_molecule(struct, pbc=None, anchors=None):
    """
    Contract each molecule in a structure so that all the atoms within a given
    molecule are at the same PBC image. In this case, "same PBC image" means
    that the molecule has chemically correct coordinates even if the PBC is
    removed. Thus, it will turn::

    |--      --|

    into::

    |        --|--

    where | = pbc wall, - = atoms

    If no PBC exists, no action will be taken.

    Algorithm: Starting at an arbitrary atom (or the anchor atom if provided,
    #1, contract (move to nearest
    image) all atoms bound to #1. Keep a list of all atoms contracted. Pick an
    atom that has been contracted and repeat, adding its neighbors to the list
    of contracted atoms. Continue until the list of contracted atoms has been
    exhausted. If all atoms in the structure have been contracted, then work is
    complete. If some atoms in the structure have not been contracted, this
    means that there is more than one molecule. Pick an arbitrary new
    uncontracted atom and return to the top of the Algorithm. Repeat this
    outer loop until all atoms have been contracted. This algorithm ensures that
    atoms are always being moved next to an atom they are bonded to rather than
    an atom that is nearby due to the PBC. It does NOT in any way move molecules
    near each other except by coincidence.

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure to modify

    :type pbc: `schrodinger.infra.structure.PBC`
    :param pbc: The PBC to use in determining images

    :type anchors: list
    :param anchors: List of atom indexes that should not be moved and that all
        other atoms should be contracted towards. Note that the use of anchors
        means that not all atoms will be moved to the same image if more than one
        anchor comes from the same molecule and the anchors are not in the same
        image.

    :raise IndexError: If an anchor index is not an atom index in struct
    """

    if not pbc:
        try:
            pbc = create_pbc(struct)
        except ValueError:
            # If a structure has no PBC, it is by definition already contracted
            return

    if not anchors:
        anchors = []

    # Atoms in this set will be used preferentially to anchor the contraction of
    # a new molecule. Note - this function uses sets of atom indexes instead of
    # _StructureAtom objects because _StructureAtom objects hash in orders that
    # are unpredictable from structure to structure, meaning that two
    # consecutive calls to the function with copies of same structure could
    # yield different results (MATSCI-2983)
    anchor_indexes = set(anchors)
    # Tracking the set of moved_atoms avoids contracting an atom twice (or once
    # if it is an anchor)
    moved_indexes = anchor_indexes.copy()
    # unchecked_atoms is all the atoms we have yet to contract the neighbors of
    unchecked_indexes = set(range(1, struct.atom_total + 1))

    while unchecked_indexes:
        # unchecked_neighbors is all the atoms that have been contracted but not
        # had their neighbors contracted. unchecked_neighbors and
        # unchecked_indexes may appear to have similar uses, but
        # unchecked_neighbors will only ever have atoms from the molecule
        # currently being contracted
        if anchor_indexes:
            anchor_index = anchor_indexes.pop()
            unchecked_neighbors = {anchor_index}
            unchecked_indexes.remove(anchor_index)
        else:
            unchecked_neighbors = {unchecked_indexes.pop()}
        # By looping over unchecked neighbors, we ensure that every atom is
        # being contracted to an atom that a) it is bonded to and b) has already
        # been contracted.
        while unchecked_neighbors:
            index = unchecked_neighbors.pop()
            atom = struct.atom[index]
            unchecked_indexes.discard(index)
            anchor_indexes.discard(index)
            for batom in atom.bonded_atoms:
                if batom.index not in moved_indexes:
                    # We haven't moved this atom yet, move it and mark it
                    batom.xyz = pbc.getNearestImage(struct, atom, batom)
                    moved_indexes.add(batom.index)
                if batom.index in unchecked_indexes:
                    # If we haven't contracted this atom's neighbors yet, add it
                    # to the list to check. It might have already been checked
                    # if we are going around a ring.
                    unchecked_neighbors.add(batom.index)


[docs]def contract_structure(struct,
                       pbc=None,
                       cell=None,
                       distance=10.,
                       contract_on_atoms=None,
                       also_keep_stationary=False):
    """
    Contract a structure so that the entire structure has chemically sensible
    coordinates even if the PBC is removed - i.e. it uses only a single PBC
    image and would be suitable for a Jaguar calculation for instance.  This
    involves two steps: 1) contracting each molecule so that the molecule itself
    is in a single image and then 2) contracting all molecules together so that
    they are all in the same image. In step 2 molecules close to a core set of
    atoms will be moved in such a way that preserves their closest possible
    approach to the core atoms. For computational speed optimization, molecules
    outside the specified cell/distance will be moved so that an arbitrary atom
    in the molecule is as close as possible to an arbitrary atom in the core. To
    avoid this, pass in a cell or distance that uses a large-enough distance
    that all molecules will fall inside that distance.

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure to modify

    :type pbc: `schrodinger.infra.structure.PBC`
    :param pbc: The PBC to use in determining images

    :type cell: `schrodinger.infra.structure.DistanceCell`
    :param cell: The distance cell to use for finding closest approaches

    :type distance: float
    :param distance: The distance to use for finding closest approaches if cell
        is not provided

    :type contract_on_atoms: list of int
    :param contract_on_atoms: List of atom indexes that form the core that all
        other molecules will be contracted towards in step 2. If not provided, the
        first molecule will be used. Note that these molecules might still move as
        part of the Step 1 process.

    :type also_keep_stationary: bool
    :param also_keep_stationary: If contract_on_atoms is provided, setting this
        flag to True will ensure that these atoms do not move during the Step 1
        contraction by molecule.
    """

    if not pbc:
        pbc = create_pbc(struct)
    # Note - creating the distance cell before doing the molecular contraction
    # keeps results consistent whether the cell is passed in or created in this
    # function. When creating the cell after contraction, I saw very slight
    # numerical differences between the two methods - mostly roundoff, but
    # enough to change results.
    if not cell:
        distance = min(distance, 0.9 * min(pbc.getBoxLengths()))
        cell, pbc = create_distance_cell(struct, distance, pbc=pbc)

    # First make sure all molecules are contracted intramolecularly
    if contract_on_atoms and also_keep_stationary:
        anchors = contract_on_atoms
    else:
        anchors = None
    contract_by_molecule(struct, pbc, anchors=anchors)

    if struct.mol_total == 1:
        return

    # Next move each molecule to be at its closest approach to the specified
    # group of atoms (or the first molecule if no atoms specified)
    if not contract_on_atoms:
        contract_on_atoms = struct.molecule[1].getAtomIndices()
    # Move nearby molecules to their closest approach
    nearest_neighbors = find_molecules_closest_to_atoms(struct,
                                                        contract_on_atoms, cell)
    for molnum, neighbor_info in nearest_neighbors.items():
        indexes = struct.molecule[molnum].getAtomIndices()
        move_atoms_next_to_xyz(pbc,
                               neighbor_info.home_atom.xyz,
                               struct,
                               neighbor_info.neighbor_atom.xyz,
                               moving_indexes=indexes)
    # Move far away molecules close
    cmols = [struct.atom[x].molecule_number for x in contract_on_atoms]
    contracted_mols = set(cmols + list(nearest_neighbors))
    catom = struct.atom[contract_on_atoms[0]]
    for mol in struct.molecule:
        if mol.number not in contracted_mols:
            indexes = mol.getAtomIndices()
            atom1 = struct.atom[indexes[0]]
            move_atoms_next_to_xyz(pbc,
                                   catom.xyz,
                                   struct,
                                   atom1.xyz,
                                   moving_indexes=indexes)


[docs]def contract_structure2(struct,
                        pbc=None,
                        cell=None,
                        distance=10.,
                        contract_on_atoms=None,
                        also_keep_stationary=False,
                        mol_nums=None):
    """
    Contract a structure so that the entire structure has chemically sensible
    coordinates even if the PBC is removed - i.e. it uses only a single PBC
    image and would be suitable for a Jaguar calculation for instance.

    This is done by:
    1) Contracting each molecule so that the molecule itself is in a single
    image. 2) Finding atoms close to "contract_on_atoms" and moving them close,
    then finding atoms that are close to the newly moved atoms and moving them
    close, and so on until no new atoms are found. 3) If any molecules remain,
    they are moved to be close to the centroid of the contracted molecules in
    step 2.

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure to modify

    :type pbc: `schrodinger.infra.structure.PBC`
    :param pbc: The PBC to use in determining images

    :type cell: `schrodinger.infra.structure.DistanceCell`
    :param cell: The distance cell to use for finding closest approaches

    :type distance: float
    :param distance: The distance to use for finding closest approaches if cell
        is not provided

    :type contract_on_atoms: list of int
    :param contract_on_atoms: List of atom indexes that form the core that all
        other molecules will be contracted towards in step 2. If not provided, the
        first molecule will be used. Note that these molecules might still move as
        part of the Step 1 process.

    :type also_keep_stationary: bool
    :param also_keep_stationary: If contract_on_atoms is provided, setting this
        flag to True will ensure that these atoms do not move during the Step 1
        contraction by molecule.

    :type mol_nums: iterable or None
    :param mol_nums: If supplied, only move these molecules in steps 2 and 3.
        NOTE: contract_by_molecule is unaffected by this.
    """

    if not pbc:
        pbc = create_pbc(struct)
    # Note - creating the distance cell before doing the molecular contraction
    # keeps results consistent whether the cell is passed in or created in this
    # function. When creating the cell after contraction, I saw very slight
    # numerical differences between the two methods - mostly roundoff, but
    # enough to change results.
    if not cell:
        distance = min(distance, 0.45 * min(pbc.getBoxLengths()))
        cell, pbc = create_distance_cell(struct, distance, pbc=pbc)

    # First make sure all molecules are contracted intramolecularly
    if contract_on_atoms and also_keep_stationary:
        anchors = contract_on_atoms
    else:
        anchors = None
    contract_by_molecule(struct, pbc, anchors=anchors)

    if struct.mol_total == 1:
        return

    # Next move each molecule to be at their closest approach to the specified
    # group of atoms (or the first molecule if no atoms specified)
    if not contract_on_atoms:
        mol_num = sorted(mol_nums)[0] if mol_nums is not None else 1
        contract_on_atoms = struct.molecule[mol_num].getAtomIndices()

    # Move nearby molecules to their closest approach
    new_atoms_ids = set(contract_on_atoms)
    contracted_atom_ids = new_atoms_ids.copy()
    skip_mols = {struct.atom[x].molecule_number for x in contract_on_atoms}
    if mol_nums:
        skip_mols.update(
            [x for x in range(1, struct.mol_total + 1) if x not in mol_nums])
    while new_atoms_ids:
        nearest_neighbors = find_molecules_closest_to_atoms(
            struct, new_atoms_ids, cell)

        new_atoms_ids.clear()
        for molnum, neighbor_info in nearest_neighbors.items():
            if molnum in skip_mols:
                continue
            skip_mols.add(molnum)
            indexes = struct.molecule[molnum].getAtomIndices()
            contracted_atom_ids.update(indexes)
            new_atoms_ids.update(indexes)
            move_atoms_next_to_xyz(pbc,
                                   neighbor_info.home_atom.xyz,
                                   struct,
                                   neighbor_info.neighbor_atom.xyz,
                                   moving_indexes=indexes)

    # Move far away molecules close to the centroid of contracted molecules
    xyz_indexes = numpy.array(list(contracted_atom_ids)) - 1
    centeroid = numpy.average(struct.getXYZ()[xyz_indexes], axis=0)

    for mol in struct.molecule:
        if mol.number not in skip_mols:
            indexes = mol.getAtomIndices()
            atom1 = struct.atom[indexes[0]]
            move_atoms_next_to_xyz(pbc,
                                   centeroid,
                                   struct,
                                   atom1.xyz,
                                   moving_indexes=indexes)


[docs]def create_dimer_structures(struct,
                            atoms,
                            pbc=None,
                            nearest_neighbors=None,
                            cell=None,
                            distance=None):
    """
    Create structures for each dimer that the atoms in the atoms list form

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure containing the dimers

    :type atoms: list of int
    :param atoms: The atom indexes to find dimers for

    :type pbc: `schrodinger.infra.structure.PBC`
    :param pbc: The PBC to use in determining images

    :type nearest_neighbors: dict
    :param nearest_neighbors: Dictionary with keys that are molecule numbers and
        values are `Neighbor` objects for the atom in that molecule that is closest
        to one of the atoms in the atoms list. Such a dictionary is provided by
        find_molecules_closest_to_atoms.

    :type cell: `schrodinger.infra.structure.DistanceCell`
    :param cell: The distance cell to use for finding dimers. Either distance or
        cell must be provided if nearest_neighbors is not provided.

    :type distance: float
    :param distance: The intermolecular distance to use for finding dimers if
        cell is not provided. Either distance or cell must be provided if
        nearest_neighbors is not provided.

    :rtype: list
    :return: A list of `schrodinger.structure.Structure` objects, one for each
        dimer found

    :raise RuntimeError: If none of nearest_neighbors, cell nor distance are
        provided
    """

    # Find the nearest neighbors if not provided
    if nearest_neighbors is None:
        if cell is None:
            if distance is None:
                raise RuntimeError('At least one of nearest_neighbors, cell or '
                                   'distance must not be None')
            cell = create_distance_cell(struct, distance, pbc)
        nearest_neighbors = find_molecules_closest_to_atoms(struct, atoms, cell)

    center = struct.extract(atoms)
    if pbc:
        # Ensure that center is not broken up into pieces by the PBC
        contract_structure(center, pbc)
    dimers = []

    # For each neighboring molecule, extract it, move it to the PBC-created
    # image that generated the nearest neighboring atom, then merge it into the
    # dimer
    for molnum, neighbor in nearest_neighbors.items():
        # Create the neighbor structure
        natoms = struct.molecule[molnum].getAtomIndices()
        nst = struct.extract(natoms)
        # Ensure that the neighbor molecule is not broken into pieces by the pbc
        if pbc:
            contract_by_molecule(nst, pbc)
            # Next, translate the entire neighbor structure so that its point of
            # closest approach to the home structure uses the image that is
            # closest to the home structure
            home_index = get_extracted_atom_index(atoms,
                                                  neighbor.home_atom.index)
            home_atom = center.atom[home_index]
            neighbor_index = get_extracted_atom_index(
                natoms, neighbor.neighbor_atom.index)
            neighbor_atom = nst.atom[neighbor_index]
            move_atoms_next_to_xyz(pbc, home_atom.xyz, nst, neighbor_atom.xyz)

        dimer = center.copy()
        dimer.extend(nst)
        dimers.append(dimer)
    return dimers


[docs]def create_nearest_neighbor_cluster(struct,
                                    atoms,
                                    cell,
                                    pbc=None,
                                    heavy_only=False,
                                    by_com=False):
    """
    Return a structure that has the given atoms surrounded by their
    nearest neighbor molecules

    :type struct: `schrodinger.structure.Structure`
    :param struct: The input structure

    :type atoms: list of int
    :param atoms: The atom indexes that will form the center of the cluster

    :type cell: `schrodinger.infra.structure.DistanceCell`
    :param cell: The DistanceCell to use when finding nearest neighbors

    :type pbc: `schrodinger.infra.structure.PBC`
    :param pbc: The PBC to use when finding nearest images if one should be used

    :type heavy_only: bool
    :param heavy_only: Use only heavy atom distances when finding the cluster

    :type by_com: bool or `ComInfo`
    :param by_com: If True, create the info needed to find neighbors by
        center of mass instead of atom distance. If `ComInfo`, this is the
        pre-calculated data to use for center of mass distance. If False,
        do not find neighbors by center of mass. heavy_only is ignored if
        by_com is not False.

    :rtype: `schrodinger.structure.Structure`
    :return: A cluster that has the given atoms at the center and all nearest
        neighbors surrounding them. The given atoms appear first in the structure.
    """

    # Add the given atoms as the first atoms in the new cluster
    cluster = struct.extract(atoms)
    for atom in cluster.atom:
        atom.property[NEIGHBOR_ATOM_PROP] = False

    # Add all the nearest neighbors first
    nearest_neighbors = find_molecules_closest_to_atoms(struct,
                                                        atoms,
                                                        cell,
                                                        heavy_only=heavy_only,
                                                        by_com=by_com)
    maxdsq = 0.0
    for molindex, neighbor_info in nearest_neighbors.items():
        nstruct = struct.molecule[molindex].extractStructure()
        for atom in nstruct.atom:
            atom.property[NEIGHBOR_ATOM_PROP] = True
        cluster.extend(nstruct)
        maxdsq = max(maxdsq, neighbor_info.dsq)

    # Contract the cluster to nearest neighbor images
    if pbc:
        distance = math.sqrt(maxdsq)
        # Add just a bit to the max distance to ensure all neighbors are found
        # by the PBC distance cell even if numerical roundoff occurs
        distance = distance + 0.01
        central_atoms = list(range(1, len(atoms)))
        contract_structure(cluster,
                           pbc,
                           distance=distance,
                           contract_on_atoms=central_atoms)

    return cluster


[docs]def create_non_pbc_cluster(struct, molnumbers):
    """
    Create a cluster of the given molecules

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure to extract molecules from

    :type molnumbers: list of int
    :param molnumbers: The molecule numbers of the molecules to extract

    :rtype: `schrodinger.structure.Structure`
    :return: A cluster of molecules
    """
    asl = ' or '.join(['mol.n %d' % x for x in molnumbers])
    atoms = analyze.evaluate_asl(struct, asl)
    cluster = struct.extract(atoms)
    return cluster


[docs]def get_all_atoms_marked_as_neighbors(struct):
    """
    Return a list of all atom objects in struct that are marked as neighbors

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure to extract atoms from

    :rtype: list
    :return: A list of all atom objects marked with the NEIGHBOR_ATOM_PROP
        property
    """

    qatoms = []
    for atom in struct.atom:
        if atom.property.get(NEIGHBOR_ATOM_PROP):
            qatoms.append(atom)
    return qatoms


[docs]def add_neighbor_charges_to_jaginput(struct, qprop, jfile):
    """
    Add point charges that represent neighboring atoms to an existing jaguar
    input file. A point charge will be written for every atom that has the
    NEIGHBOR_ATOM_PROP set to True. The charges are added in a &pointch section.

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure to extract charges from

    :type qprop: str
    :param qprop: The atom property to use as for the charge values. If qprop is
        `ESP_ARGPARSE_VALUE`, `ESP_CHARGE_PROP` will be used instead. This
        allows restart jobs to use previously-computed charges without
        recomputing them.

    :type jfile: str
    :param jfile: The path to the Jaguar input file
    """

    if qprop == ESP_ARGPARSE_VALUE:
        qprop = ESP_CHARGE_PROP

    qatoms = get_all_atoms_marked_as_neighbors(struct)
    if qatoms:
        with open(jfile, 'a') as infile:
            infile.write('\n&pointch\n')
            for atom in qatoms:
                # Add a line: charge X Y Z
                values = tuple([atom.property.get(qprop, 0.0)] + atom.xyz)
                infile.write('%.6f %.5f %.5f %.5f\n' % values)
            infile.write('&\n')


[docs]def set_neighbor_atoms_basis_set(struct, basis, jfile):
    """
    Set all atoms marked as neighbors with the NEIGHBOR_ATOM_PROP property
    to use the provided basis set.  The basis set is specified in an &atomic
    section.

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure to extract charges from

    :type basis: str
    :param basis: The basis set to use for neighbor atoms

    :type jfile: str
    :param jfile: The path to the Jaguar input file
    """

    batoms = get_all_atoms_marked_as_neighbors(struct)
    if batoms:
        with open(jfile, 'a') as infile:
            infile.write('\n&ATOMIC\natom basis\n')
            for atom in batoms:
                # Add a line: atom_index basis_set
                infile.write('%d\t%s\n' % (atom.index, basis))
            infile.write('&\n')


[docs]def find_mol_species_from_rdmol(patterns):
    """
    Finds unique molecule species.

    :param patterns: The list of pattern to calculate species from.
    :type patterns: list(schrodinger.application.matsci.rdpattern.Pattern)

    :returns: dict
    :rtype: dictionary where the key is the unique smiles for each species
        and value is the species data
    """

    species = {}
    for st_idx, pattern in enumerate(patterns):
        for mol_idx, smiles in pattern.getMoleculeSmiles().items():
            molecule = pattern.struct.molecule[mol_idx]
            mol_struct = molecule.extractStructure()

            if pattern.is_coarse_grain:
                chem_formula = coarsegrain.get_atom_name_formula(mol_struct)
            else:
                chem_formula = analyze.generate_molecular_formula(mol_struct)

            if smiles not in species:
                species[smiles] = AtomGroupData(smiles, chem_formula)

            atom_indices = molecule.getAtomIndices()
            species[smiles].addMember(st_idx, atom_indices)

    return species


[docs]def find_res_species(patterns):
    """
    Finds unique residue species.

    :param patterns: The list of pattern to calculate species from.
    :type patterns: list(schrodinger.application.matsci.rdpattern.Pattern)

    :returns: dict
    :rtype: dictionary where the key is the unique smarts for each species
        and value is the species data
    """

    species = {}
    for st_idx, pattern in enumerate(patterns):
        # Loop through disconnected residues
        for residue in pattern.struct.residue:
            res_aids = residue.getAtomIndices()
            res_st = residue.extractStructure()

            if pattern.is_coarse_grain:
                chem_formula = coarsegrain.get_atom_name_formula(res_st)
            else:
                chem_formula = analyze.generate_molecular_formula(res_st)

            smiles = pattern.toSmiles(atom_ids=sorted(res_aids))
            key = f'{residue.pdbres} ({chem_formula})'
            if key not in species:
                species[key] = AtomGroupData(smiles, key)
            species[key].addMember(st_idx, res_aids)
    return species


[docs]def find_species(structs, naming_function=None):
    """
    Find all chemically distinct species in the given structures, as determined
    via unique SMILES strings or unique names.

    If naming_function is supplied, species are grouped using names, unless the
    function returns None, in which case the code falls back to using SMILES.

    :type structs: list of `schrodinger.structure.Structure`
    :param structs: The structures to find species in

    :type naming_function: callable or None
    :param naming_function: The function to call to get a name for each
        molecule. Should take a `structure._Molecule` object as argument and
        return a string

    :rtype: dict
    :return: Keys are unique SMILES strings or group names for species, values
        are SpeciesData objects for that species
    """

    # We need to find all species in the chosen structures
    species = {}
    large_species = {}
    for stindex, struct in enumerate(structs):
        for mol in struct.molecule:
            group_name = smiles = None
            if naming_function:
                group_name = naming_function(mol)
                key = group_name
            if not group_name:
                molstruct = mol.extractStructure()
                group_name = analyze.generate_molecular_formula(molstruct)
                # 20000 is a hardcoded limit on the number of atoms for a SMILES
                # pattern (MATSCI-12295). This can be removed when we switch
                # to rdkit
                heavy_atom_limit = 20000
                # For a quick estimate, assume all atoms are heavy
                num_heavy_atoms = molstruct.atom_total
                if num_heavy_atoms >= heavy_atom_limit:
                    # There are enough atoms so we need to count heavy atoms
                    num_heavy_atoms = len(
                        analyze.evaluate_asl(molstruct, 'heavy_atoms'))
                if num_heavy_atoms < heavy_atom_limit:
                    smiles = msutils.generate_smiles(molstruct)
                else:
                    # This structure is too large to create a SMILES pattern
                    # for. We'll lump all large structures with the same
                    # molecular formula into the same species.
                    new_fake_smiles = f'large{len(large_species)}'
                    smiles = large_species.setdefault(group_name,
                                                      new_fake_smiles)
                key = smiles
            if key not in species:
                # Found a new species - create a data structure for it
                species[key] = SpeciesData(smiles=smiles, group_name=group_name)
            species[key].addMember(stindex, mol.number)

    return species


[docs]def get_species_display_names(species):
    """
    Create a dictionary mapping the display name of species to the species

    :param iterable species: The species to get display names for

    :rtype: dict
    :return: dictionary containing species display names and species objects
    """
    itemdict = {}
    for data in species:
        key = '%s %s:%d' % (data.group_name, data.TYPE_LABEL, len(data.members))
        if key in itemdict:
            # Two species with the same formula and same number of members.
            # Uniquify the keys by adding an example molecule number
            # First uniquify the existing key and change the dict entry
            existing_value = itemdict.pop(key)
            existing_key = key + existing_value.getExampleMemberTag()
            itemdict[existing_key] = existing_value
            # Now uniquify the new key
            key = key + data.getExampleMemberTag()

        itemdict[key] = data

    return itemdict


[docs]def get_species_from_mol(all_species, mol_id):
    """
    Get the species that mol_id belongs to. Assumes that all members belong to a
    single structure.

    :param iterable all_species: All species to look in

    :param int mol_id: The mol id to get the species for

    :raise RuntimeError: If the mol_id doesn't belong to any of the passed
        species

    :rtype: `SpeciesData`
    :return: The species that contains mol_id
    """
    for species in all_species:
        if (0, mol_id) in species.members:
            return species
    else:
        raise RuntimeError(
            f'Molecule {mol_id} does not belong to any of the species.')


[docs]def get_example_mol_tag(species_data):
    """
    Get an additional tag that gets an example molecule number for this
    species

    :type species_data: `SpeciesData`
    :param species_data: The data for this species

    :rtype: str
    :return: The tag for this species
    """

    molnums = [x[1] for x in species_data.members]
    molnums.sort()
    return ' Example Mol #: %d' % molnums[0]


[docs]def get_base_esp_jobname():
    """
    Get the base name that will be used for esp jobs

    :rtype: str
    :return: The base name for esp jobs
    """

    backend = jobcontrol.get_backend()
    if backend:
        name = backend.getJob().Name + '_'
    else:
        name = ""
    name += 'esp_'
    return name


[docs]def get_species_masses(struct, **kwargs):
    """
    Calculate the mass of each species in a Desmond system.

    :param schrodinger.structure.Structure struct: Structure to get masses of

    :param kwargs: Keyword arguments to be use when converting the structure
        into an RDKit Pattern for analysis.

    :rtype: dict(str=float)
    :return: dictionary whose keys are the species's chemical formulae and
        whose values are the total mass of that species (in Daltons)
    """
    species_masses = {}

    pattern = rdpattern.Pattern(struct, **kwargs)
    species_dict = find_mol_species_from_rdmol([pattern])
    for species in species_dict.values():

        # If we assume that all molecules within one species are identical,
        # then we only need to find the mass of one molecule to find the total
        # mass of all of them.
        _, atom_indices = next(iter(species.members))
        mol_mass = sum(struct.atom[idx].atomic_weight for idx in atom_indices)
        mass = mol_mass * len(species.members)

        species_masses[species.formula] = mass
    return species_masses


[docs]def compute_esp_charges(structs,
                        basis,
                        species=None,
                        tpp=None,
                        logger=None,
                        quiet=False,
                        opt=False):
    """
    Compute ESP charges for a representative example of each species found in
    structs.

    :type structs: list of `schrodinger.structure.Structure`
    :param structs: The structures to find species in to compute the charges of

    :type basis: str or None
    :param basis: The basis set to use when computing charges. None will use the
        default Jaguar basis set.

    :type species: dict
    :param species: Keys are unique SMILES strings, values are SpeciesData
        objects for the species with that SMILES string. If not provided, will be
        computed.

    :type tpp: int
    :param tpp: The number of threads per Jaguar job. If not provided, -TPP will
        not be included in the Jaguar command - the job will run serially.

    :type logger: `logging.Logger`
    :param logger: The logger to use, if any. If not provided and quiet=False,
        messages will be printed to the terminal

    :type quiet: bool
    :param quiet: If True, do not print or log any messages.

    :type opt: bool
    :param opt: Whether to perform a geometry optimization before computing
        charges

    :rtype: dict, list
    :return: Dict is the species - either those provided or those computed. List
        is the names of jobs that failed.
    """

    if not species:
        species = find_species(structs)
    if not quiet:
        textlogger.log(logger,
                       'Found %d species to compute charges for' % len(species))

    # Initial setup for all jobs
    jobdj = jobutils.create_queue(host=jobutils.AUTOHOST)
    keys = {'basis': basis, 'dftname': 'b3lyp', 'icfit': 1}
    keys['igeopt'] = 1 if opt else 0
    name = get_base_esp_jobname() + '%d_%d'
    base_cmd = ['jaguar', 'run']
    if tpp:
        base_cmd += ['-TPP', str(tpp)]
        if int(tpp) > 1:
            jobdj.disableSmartDistribution()
    species_jobs = {}

    # Fill the queue with jobs
    for group in species.values():

        # Create the Jaguar input file
        for mdata in group.members:
            # Grab an arbitrary member
            break
        stindex, molnum = mdata
        struct = structs[stindex].molecule[molnum].extractStructure()
        jobname = name % (stindex, molnum)
        jagin = jaginput.JaguarInput(structure=struct,
                                     genkeys=keys,
                                     name=jobname)
        jagin.save()

        # Create the job and add it to the queue
        group.esp_jobname = jobname
        cmd = base_cmd + [jagin.filename]
        job = jobutils.RobustSubmissionJob(cmd)
        jobdj.addJob(job)
        jaguarworkflows.add_jaguar_files_to_jc_backend(jobname)
        species_jobs[jobname] = group
        if not quiet:
            textlogger.log(
                logger,
                'Created input for species with formula %s' % group.formula)

    # Run the jobs
    if not quiet:
        textlogger.log(logger, 'Running charge jobs')
    jobdj.run()

    failed = []
    for qjob in jobdj.all_jobs:
        job = qjob.getJob()
        if job.succeeded():
            # Extract the charges and store then on the SpeciesData object
            outmae = job.Name + '.01.mae'
            try:
                outst = structure.Structure.read(outmae)
            except IOError:
                failed.append((job.Name, species_jobs[job.Name].formula))
                continue
            species_jobs[job.Name].addCharges(outst)
        else:
            failed.append(job.Name)

    # Message about failed jobs
    if failed and not quiet:
        failed_names = ', '.join(failed)
        textlogger.log(logger, 'Failed jobs: %s' % failed_names)

    return species, failed


[docs]def populate_with_charges(structs, species, prop=ESP_CHARGE_PROP):
    """
    For every molecule in structs, add an atom property that contains the
    previously computed charges for the species it belongs to.

    This method expects the list of structs to be the same as the list that
    species was computed from.

    :type structs: list of `schrodinger.structure.Structure`
    :param structs: The structures to find species in to compute the charges of

    :type species: dict
    :param species: Keys are unique SMILES strings, values are SpeciesData
        objects for the species with that SMILES string.

    :type prop: str
    :param prop: The atom property to set with the new charge

    :raise RuntimeError: If things don't match up, such as structs being shorter
        than expected, there being a different number of molecules in a given
        structure, the atoms not being in expected order in a molecule, or a
        molecule having a different number of atoms than expected.
    """

    # Assign charges for all species
    for group in species.values():
        for stindex, molnum in group.members:
            try:
                struct = structs[stindex]
            except IndexError:
                raise RuntimeError('There are fewer structures, %d, than '
                                   'expected, tried to find molecule %d in '
                                   'structure %d.' %
                                   (len(structs), molnum, stindex))
            try:
                mol = struct.molecule[molnum]
            except KeyError:
                raise RuntimeError('There are fewer molecules, %d, than '
                                   'expected in structure %d, tried to find '
                                   'molecule %d.' %
                                   (struct.mol_total, stindex, molnum))
            numat = len(mol.getAtomIndices())
            numq = len(group.charges)
            if numat != numq:
                raise RuntimeError('Expected %d atoms in molecule %d of '
                                   'structure %d, instead there are %d.' %
                                   (numq, molnum, stindex, numat))
            for (elem, charge), index in zip(group.charges,
                                             mol.getAtomIndices()):
                atom = struct.atom[index]
                if atom.element != elem:
                    raise RuntimeError('Expected atom %d in structure %d to be '
                                       'element %s, but instead it is %s.' %
                                       (index, stindex, elem, atom.element))
                atom.property[prop] = charge

    # Verify that all atoms had charges assigned
    for stindex, struct in enumerate(structs):
        for atom in struct.atom:
            if atom.property.get(prop) is None:
                raise RuntimeError('Atom %d in structure %d did not have a '
                                   'charge assigned to it.' %
                                   (atom.index, stindex))


[docs]def compute_and_populate_charges(structs,
                                 basis,
                                 tpp=None,
                                 logger=None,
                                 quiet=False,
                                 species=None,
                                 opt=False):
    """
    Determine the unique chemical species of each molecule in structs, compute
    the ESP charges with Jaguar for each species and then apply those charges as
    atom properties to each atom in structs.

    :type structs: list of `schrodinger.structure.Structure`
    :param structs: The structures to find species in to compute the charges of

    :type basis: str or None
    :param basis: The basis set to use when computing charges. None will use the
        default Jaguar basis set.

    :type tpp: int
    :param tpp: The number of threads per Jaguar job

    :type logger: `logging.Logger`
    :param logger: The logger to use, if any. If not provided and quiet=False,
        messages will be printed to the terminal

    :type quiet: bool
    :param quiet: If True, do not print or log any messages.

    :type species: dict
    :param species: Keys are unique SMILES strings, values are SpeciesData
        objects for the species with that SMILES string. If not passed in, will be
        determined automatically. Must correspond to the structures in structs.

    :type opt: bool
    :param opt: Whether to perform a geometry optimization before computing
        charges

    :rtype: dict, list, str
    :return: Dict is the species computed. List is the names of ESP jobs that
        failed. str is any error message that occurred during the assignment of
        computed ESP charges to atoms in structs. If quiet=False, the names of
        failed jobs and msg will have already been logged.
    """

    species, failures = compute_esp_charges(structs,
                                            basis,
                                            tpp=tpp,
                                            logger=logger,
                                            quiet=quiet,
                                            species=species,
                                            opt=opt)
    if failures:
        return species, failures, None

    msg = None
    try:
        populate_with_charges(structs, species)
    except RuntimeError as error:
        msg = str(error)
        if not quiet:
            textlogger.log(logger, msg)

    return species, failures, msg


[docs]def get_cluster_atom_ids(distance_cell, xyz):
    """
    Get ids of atoms surrounding xyz coordinate

    :param `infra.structure.DistanceCell` distance_cell: The distance cell

    :param list xyz: The coordinates of the point to get the cluster around

    :rtype: list
    :return: List of atom ids
    """
    return [match.getIndex() for match in distance_cell.query_atoms(*xyz)]