Source code for schrodinger.application.matsci.reorder

"""
Creates a frame that contains widgets to aid in reordering two structures to
have the same atom order, plus functions to estimate atom ordering.

Copyright Schrodinger, LLC. All rights reserved.
"""

import warnings

from schrodinger.application.matsci import clusterstruct
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.structutils import measure
from schrodinger.structutils import rmsd
from schrodinger.structutils import smiles


[docs]def map_hydrogens(struct1, struct2, atom_map):
    """
    For all heavy atoms already mapped, if they only have a single hydrogen
    attached to them we can also map that hydrogen.

    :type struct1: `schrodinger.structure.Structure`
    :param struct1: The first structure

    :type struct2: `schrodinger.structure.Structure`
    :param struct2: The structure to compare to the first structure

    :type atom_map: dict
    :param atom_map: keys are atom indexes for struct1, values are atom indexes
        for struct2.  Each key, value pair represents an atom that is the same atom
        in both structures. No atoms in this dictionary will be added to the
        dictionary returned by this method.

    :rtype: dict
    :return: dictionary, keys are indexes of H atoms in struct1 that were mapped
        by this method, each value is the index of a struct2 H atom that maps to the
        key atom.
    """

    def bonded_unmapped_protons(atom, mapped_atoms):
        protons = []
        for neighbor in atom.bonded_atoms:
            if neighbor.element == 'H' and neighbor.index not in mapped_atoms:
                protons.append(neighbor.index)
        return protons

    h_map = {}
    mapped_comp_atoms = set(atom_map.values())
    for index1, index2 in atom_map.items():
        atom1 = struct1.atom[index1]
        atom2 = struct2.atom[index2]
        protons1 = bonded_unmapped_protons(atom1, set(list(atom_map)))
        protons2 = bonded_unmapped_protons(atom2, set(atom_map.values()))
        if len(protons1) == len(protons2):
            if len(protons1) == 1:
                key = protons1[0]
                comp = protons2[0]
                if key not in atom_map and comp not in mapped_comp_atoms:
                    h_map[key] = comp
            elif len(protons1) > 1:
                h_map.update(
                    map_neighboring_hydrogens(struct1, struct2, atom1, protons1,
                                              protons2, atom_map))
    return h_map


[docs]def map_neighboring_hydrogens(struct1,
                              struct2,
                              atom,
                              protons1,
                              protons2,
                              atom_map,
                              threshold=10.):
    """
    If two or more hydrogens are bonded to an atom, we have to look at the
    similarities of the dihedrals between the structures to map them from
    struct2 to struct1

    :type struct1: `schrodinger.structure.Structure`
    :param struct1: The first structure

    :type struct2: `schrodinger.structure.Structure`
    :param struct2: The structure to compare to the first structure

    :type atom: `schrodinger.structure._StructureAtom`
    :type atom: The atom the reference protons are bonded to - the index of this
        atom must be a key in atom_map

    :type protons1: list
    :param protons1: list of atom indexes of protons bound to atom in struct1

    :type protons2: list
    :param protons2: list of atom indexes of protons bound to atom in struct2

    :type atom_map: dict
    :param atom_map: keys are atom indexes for struct1, values are atom indexes
        for struct2.  Each key, value pair represents an atom that is the same atom
        in both structures

    :type threshold: float
    :param threshold: The dihedral angle difference threshold for considering
        protons "the same" between the two structures (degrees).

    :rtype: dict
    :return: dictionary, keys are indexes of H atoms in struct1 that were mapped
        by this method, each value is the index of a struct2 H atom that maps to the
        key atom.
    """

    def mapped_heavy_neighbors(myatom):
        # Return a list of all heavy atoms bound to myatom that are mapped
        heavies = []
        for neighbor in myatom.bonded_atoms:
            if neighbor.element != 'H' and neighbor.index in atom_map:
                heavies.append(neighbor)
        return heavies

    def add_dihedral_heavies(nlist):
        # Add atoms in nlist to the list of dihedral atoms unless they are
        # already in the list
        new = [(x, x.index) for x in nlist if x.index not in dihedral_indexes]
        while len(dihedral_heavies) < 3 and new:
            new_atom, new_index = new.pop(0)
            dihedral_heavies.append(new_atom)
            dihedral_indexes.add(new_index)

    # Only attempt to map if there are the same number of unmapped hydrogens in
    # both structures (MATSCI-961, MATSCI-966)
    if len(protons1) != len(protons2):
        return {}

    # First heavy atom in the dihedral is atom itself
    dihedral_heavies = [atom]
    dihedral_indexes = {atom.index}

    # First attempt to use any heavy atoms directly bonded to atom
    neighbors = mapped_heavy_neighbors(atom)
    add_dihedral_heavies(neighbors)

    # If we don't have enough atoms for a dihedral, check for mapped neighbors
    # of any heavy atoms directly bonded to atom
    if len(dihedral_heavies) < 3:
        if neighbors:
            neighbors2 = mapped_heavy_neighbors(neighbors[0])
            add_dihedral_heavies(neighbors2)

    # Just add random mapped atoms until we get a satisfactory set of atoms
    if len(dihedral_heavies) < 3:
        for index in atom_map.keys():
            newatom = struct1.atom[index]
            if newatom.element != 'H':
                add_dihedral_heavies([newatom])
            if len(dihedral_heavies) == 3:
                break
        else:
            # Couldn't find enough heavy atoms
            return {}

    dihedral_heavies2 = [
        struct2.atom[atom_map[x.index]] for x in dihedral_heavies
    ]

    # Compute all the dihedral angles
    def dihedral_delta(torsion1, torsion2):
        # Calculate the delta between two dihedrals.  This is just the
        # difference between them unless each is near 180 and opposite in sign
        delta = abs(torsion1 - torsion2)
        # Catch instances near +-180
        if delta >= 360 - threshold:
            delta = abs(delta - 360)
        return delta

    angle = measure.measure_dihedral_angle
    hatoms1 = [struct1.atom[x] for x in protons1]
    dihedrals1 = [(x.index, angle(x, *dihedral_heavies)) for x in hatoms1]
    hatoms2 = [struct2.atom[x] for x in protons2]
    dihedrals2 = [(x.index, angle(x, *dihedral_heavies2)) for x in hatoms2]

    # Match dihedral angles that are within the threshold
    h_map = {}
    for index2, torsion2 in dihedrals2:
        for index1, torsion1 in dihedrals1:
            if dihedral_delta(torsion1, torsion2) <= threshold and \
               index1 not in atom_map and index2 not in atom_map.values():
                # Found a match between two unmapped hydrogens
                if index1 not in h_map and index2 not in h_map.values():
                    # We haven't paired either of these hydrogens yet
                    h_map[index1] = index2
    if len(h_map) == len(protons1):
        # A mapping was found for each proton attached to atom
        return h_map
    else:
        # We either map all the unmapped hydrogens attched to atom or none
        return {}


[docs]def find_atoms_near_point(struct, distance, xyz, dcell=None):
    """
    Find all atoms in struct that are within distance of point xyz. It is PBC
    aware.

    :type struct: `schrodinger.structure.Structure`
    :param struct: The structure containing the atoms to search.

    :type distance: float
    :param distance: Find all atoms within this distance in Angstroms

    :type xyz: list
    :param xyz: list of [x, y, z] coordinates

    :type dcell: `schrodinger.infra.structure.DistanceCell` or None
    :param dcell: The infrastructure DistanceCell created for input structure.
        If None, it will be created (can be slow).

    :rtype: list
    :return: list of atom indexes within distance of point xyz.  The list is
        sorted by distance with the index of the closest atom first.
    """

    if dcell is None:
        dcell, pbc = clusterstruct.create_distance_cell(struct, distance)

    neighbors = {}
    for match in dcell.query_atoms(*xyz):
        idx, distance = match.getIndex(), match.getDistanceSquared()

        # Keep only the shortest distance to the atom from xyz
        if ((idx in neighbors and neighbors[idx] > distance) or
                idx not in neighbors):
            neighbors[idx] = distance

    neighbors = sorted(neighbors, key=neighbors.get)
    return neighbors


[docs]def compare_molecular_formulas(struct1, struct2):
    """
    Check to ensure the molecular formulas of both structures are the same

    :type struct1: `schrodinger.structure.Structure`
    :param struct1: The first structure

    :type struct2: `schrodinger.structure.Structure`
    :param struct2: The structure to compare to the first structure

    :rtype: bool, str, str
    :return: True if the molecular formulas are the same, False if not, and then
        both molecular formulas
    """

    form1 = analyze.generate_molecular_formula(struct1)
    form2 = analyze.generate_molecular_formula(struct2)
    return form1 == form2, form1, form2


[docs]def map_by_lone_element(struct1, struct2, atom_map=None, check_formula=False):
    """
    Return a map of atoms in struct2 to atoms in struct1.  The only criteria
    used is that if only a single atom of an element exists in both structures,
    those atoms are mapped to each other.  All other atoms are unmapped.

    Note that this function assumes that a reaction might have taken place
    between the two structures, so just because there is a single F atom in both
    structures does not mean that the C atoms bound to the F is the same in both
    structures.

    :type struct1: `schrodinger.structure.Structure`
    :param struct1: The first structure

    :type struct2: `schrodinger.structure.Structure`
    :param struct2: The structure to compare to the first structure

    :type atom_map: dict
    :param atom_map: keys are atom indexes for struct1, values are atom indexes
        for struct2.  Each key, value pair represents an atom that is the same atom
        in both structures

    :type check_formula: bool
    :param check_formula: True if the molecular formulas should be checked to
        ensure they are identical, False if not

    :rtype: dict
    :return: A mapping of atom numbers in struct2 to atom numbers in struct1.
        keys are struct1 atom numbers, values are struct2 atom numbers. Dictionary
        contains only those atoms mapped by this method

    :raise ValueError: if the molecular formulas of the two structures are not
        equal and check_formula is not False
    """

    if check_formula:
        identical, form1, form2 = compare_molecular_formulas(struct1, struct2)
        if not identical:
            raise ValueError('The molecular formula of the first structure, %s'
                             'is not identical to the molecular formula of the '
                             'second structure, %s' % (form1, form2))
    elements = {}
    elements2 = {}
    if atom_map is None:
        atom_map = {}
    element_map = {}
    set2 = set(atom_map.values())

    def enumerate_elements(struct, mapped, new_map):
        """
        For any atom in struct that is not in mapped, add to a list in new_map.
        new_map is a dictionary keyed by atomic symbol whose values are atom
        indexes of that element
        """
        for atom in struct.atom:
            if atom.index not in mapped:
                new_map.setdefault(atom.element, []).append(atom.index)

    enumerate_elements(struct1, atom_map, elements)
    enumerate_elements(struct2, set2, elements2)
    # Find those elements for which there is only one atom in each struct
    for element, members in elements.items():
        if len(members) == 1:
            members2 = elements2.get(element, [])
            if len(members2) == 1:
                element_map[members[0]] = members2[0]
    return element_map


[docs]def map_by_smiles(struct1,
                  struct2,
                  atom_map=None,
                  also_map_hydrogens=True,
                  stereo=smiles.STEREO_FROM_ANNOTATION_AND_GEOM):
    """
    Return a map of atoms in struct2 to atoms in struct1.  Unique SMILES strings
    are generated for both structures - if they are the same, then a mapping is
    produced that maps the atom indexes of struct2 onto the atom indexes of
    struct 1. Protons are not mapped, except where only a single proton is
    attached to a mapped atom.

    :type struct1: `schrodinger.structure.Structure`
    :param struct1: The first structure

    :type struct2: `schrodinger.structure.Structure`
    :param struct2: The structure to compare to the first structure

    :type atom_map: dict
    :param atom_map: keys are atom indexes for struct1, values are atom indexes
        for struct2.  Each key, value pair represents an atom that is the same atom
        in both structures. No atoms in this dictionary are returned in the final
        mapping.

    :type also_map_hydrogens: bool
    :param also_map_hydrogens: By default, an attempt will be made to map
        hydrogen atoms based on the heavy atom mapping. Set to False to not do this.

    :type stereo: `smiles` module constant
    :param stereo: The stereo value to feed to the SMILES generator. By default
        this is smiles.STEREO_FROM_ANNOTATION_AND_GEOM, use smiles.NO_STEREO to
        generate SMILES strings with no stereo information

    :rtype: dict
    :return: A mapping of atom numbers in struct2 to atom numbers in struct1.
        keys are struct1 atom numbers values are struct2 atom numbers
    """

    if atom_map is None:
        atom_map = {}
    sgen = smiles.SmilesGenerator(stereo=stereo, unique=True)
    # mapx is a list of atom indicies in the same order as the smiles string
    smiles1, maplist1 = sgen.getSmilesAndMap(struct1)
    smiles2, maplist2 = sgen.getSmilesAndMap(struct2)
    if smiles1 != smiles2:
        return {}
    smiles_map = dict(list(zip(maplist1, maplist2)))
    for key, value in atom_map.items():
        try:
            if smiles_map[key] != value:
                # This SMILES map violates the already established mapping by
                # mapping a different comp atom to this ref atom
                return {}
            else:
                # This atom already appears in the atom map
                del smiles_map[key]
        except KeyError:
            # This atom isn't in the smiles map - it's probably a hydrogen
            pass
        if value in smiles_map.values():
            # This SMILES map violates the already established mapping by
            # mapping a different ref atom to this comp atom
            return {}
    if also_map_hydrogens:
        h_map = map_hydrogens(struct1, struct2, smiles_map)
        smiles_map.update(h_map)
    return smiles_map


[docs]def map_by_smarts(struct1, struct2, atom_map=None, also_map_hydrogens=True):
    """
    Return a map of atoms in struct2 to atoms in struct1. If the SMARTS pattern
    for each entire molecule matches, then atoms are set to that order.
    Otherwise, struct1 is searched for atoms that have a unique SMARTS pattern.
    If only one atom in struct2 also has that SMARTS pattern, the two atoms are
    mapped to each other. This is done for each unique pair in the two
    structures.

    :type struct1: `schrodinger.structure.Structure`
    :param struct1: The first structure

    :type struct2: `schrodinger.structure.Structure`
    :param struct2: The structure to compare to the first structure

    :type atom_map: dict
    :param atom_map: keys are atom indexes for struct1, values are atom indexes
        for struct2.  Each key, value pair represents an atom that is the same atom
        in both structures. No atoms in this dictionary are returned in the final
        mapping.

    :type also_map_hydrogens: bool
    :param also_map_hydrogens: By default, an attempt will be made to map
        hydrogen atoms based on the heavy atom mapping even if they are not mapped
        via SMARTS patterns. Set to False to not do this. Even if this paramter is
        False, hydrogens that uniquely match SMARTS patterns will be mapped.

    :rtype: dict
    :return: A mapping of atom numbers in struct2 to atom numbers in struct1.
        keys are struct1 atom numbers values are struct2 atom numbers. Dictionary
        contains only those atoms mapped by this method.

    :raise ValueError: if the SMILES strings of the two structures do not match
    """

    if atom_map is None:
        atom_map = {}
    sgen = smiles.SmilesGenerator(stereo=smiles.NO_STEREO,
                                  unique=True,
                                  wantAllH=True)

    def reorder_struct(struct):
        """
        Return a re-ordered copy of struct, the smiles string, and a dictionary
        whose keys are struct atom indexes and values are the atom indexes in
        the reordered copy
        """

        smiles, maplist = sgen.getSmilesAndMap(struct)
        numat = struct.atom_total
        # Remove any indexes that are higher than the total number of atoms.
        # These are H atoms that the SmilesGenerator added to fulfill what it
        # thinks are empty valences.
        maplist = [x for x in maplist if x <= numat]
        scopy = build.reorder_atoms(struct, maplist)
        smap = dict(enumerate(maplist, 1))
        return scopy, smiles, smap

    # First we need to reorder the structures by their own unique SMILES to
    # have any chance of matching SMARTS for the whole structure
    s1_reordered, s1_smiles, s1_remap = reorder_struct(struct1)
    s2_reordered, s2_smiles, s2_remap = reorder_struct(struct2)

    smarts_map = {}
    if s1_smiles == s2_smiles:
        # The same SMILES pattern for both structures, just map all the atoms
        # The remap dictionaries have {original_atom_index: SMILES atom index}
        # so we map the SMILES atom index for struct1 to the SMILES atom
        # index for struct2.
        smarts_map = {
            x: y for x, y in zip(s1_remap.values(), s2_remap.values())
        }
        for key, value in atom_map.items():
            if smarts_map[key] != value:
                # This SMARTS map violates the already established mapping by
                # mapping a different comp atom to this ref atom
                smarts_map = {}
                break
            else:
                del smarts_map[key]
            if value in smarts_map.values():
                # This SMARTS map violates the already established mapping by
                # mapping a different ref atom to this comp atom
                smarts_map = {}
                break
    if smarts_map:
        return smarts_map

    # Not a total structure SMARTS match, or an error occurred in which the
    # total SMARTS match didn't match the already mapped atoms, but maybe some
    # atoms will match.
    def fill_smarts_dict(struct, mapped):
        """
        :type struct: `schrodinger.structure.Structure`
        :param struct: The struct to evalute SMARTS with

        :type mapped: set
        :param mapped: The set of atoms already mapped in this structure

        :rtype: dict
        :return: A dictionary of SMARTS patterns (keys) and list of atoms
            with that pattern (values)
        """
        smarts = {}
        for atom in struct.atom:
            if atom.index not in mapped:
                with warnings.catch_warnings():
                    warnings.filterwarnings("ignore",
                                            category=DeprecationWarning,
                                            message="analyze.generate_smarts")
                    asmart = analyze.generate_smarts(struct, [atom.index])
                smarts.setdefault(asmart, []).append(atom.index)
        return smarts

    smap1 = fill_smarts_dict(struct1, set(list(atom_map)))
    smap2 = fill_smarts_dict(struct2, set(atom_map.values()))
    for pattern, members1 in smap1.items():
        if len(members1) == 1:
            members2 = smap2.get(pattern, [])
            if len(members2) == 1:
                smarts_map[members1[0]] = members2[0]
    if also_map_hydrogens:
        h_map = map_hydrogens(struct1, struct2, smarts_map)
        smarts_map.update(h_map)
    return smarts_map


[docs]def map_by_superposition(struct1,
                         struct2,
                         atom_map,
                         check_formula=False,
                         preserve_elements=True,
                         threshold=1.0,
                         also_map_hydrogens=True):
    """
    Return a map of atoms in struct2 to atoms in struct1.  struct2 is first
    superimposed on struct1 using the atom lists supplied. Then all atoms in
    struct2 that are within threshold distance of an atom in struct1 are mapped
    to the closest atom.

    Note that this function assumes that a reaction might have taken place
    between the two structures, so just because there is a single F atom in both
    structures does not mean that the C atoms bound to the F is the same in both
    structures.

    :type struct1: `schrodinger.structure.Structure`
    :param struct1: The first structure

    :type struct2: `schrodinger.structure.Structure`
    :param struct2: The structure to compare to the first structure

    :type atom_map: dict
    :param atom_map: keys are atom indexes for struct1, values are atom indexes
        for struct2.  Each key, value pair represents an atom that is the same atom
        in both structures. atom_map must be at least 3 atoms long - the
        superposition will be done using atom_map. No atoms in this dictionary are
        returned in the final mapping.

    :type check_formula: bool
    :param check_formula: True if the molecular formulas should be checked to
        ensure they are identical, False if not

    :type preserve_elements: bool
    :param preserve_elements: True if only atoms of the same element should be
        mapped to each other, False if not

    :type threshold: float
    :param threshold: Only atoms closer than this in Angstroms will be
        considered for mapping

    :type also_map_hydrogens: bool
    :param also_map_hydrogens: By default, an attempt will be made to map
        hydrogen atoms based on the heavy atom mapping even if they are not mapped
        via superposition. Set to False to not do this. Even if this paramter is
        False, hydrogens that map via superposition will be mapped.

    :rtype: dict
    :return: A mapping of atom numbers in struct2 to atom numbers in struct1.
        keys are struct1 atom numbers values are struct2 atom numbers

    :raise ValueError: if the molecular formulas of the two structures are not
        equal and check_formula is not False

    :raise RuntimeError: if atom_map is shorter than 3 atoms
    """

    if check_formula:
        identical, form1, form2 = compare_molecular_formulas(struct1, struct2)
        if not identical:
            raise ValueError('The molecular formula of the first structure, %s'
                             'is not identical to the molecular formula of the '
                             'second structure, %s' % (form1, form2))

    dcell, pbc = clusterstruct.create_distance_cell(struct1, threshold)

    struct2_posed = struct2.copy()
    rmsd.superimpose(struct1, list(atom_map), struct2_posed,
                     list(atom_map.values()))
    superpose_map = {}
    atoms1 = set(list(atom_map))
    atoms2 = set(atom_map.values())
    for atom in struct2_posed.atom:
        if atom.index in atoms2:
            # Already mapped
            continue
        for index in find_atoms_near_point(struct1,
                                           threshold,
                                           atom.xyz,
                                           dcell=dcell):
            if index in superpose_map or index in atoms1:
                continue
            if preserve_elements:
                neighbor = struct1.atom[index]
                if neighbor.element != atom.element:
                    continue
            superpose_map[index] = atom.index
            break
    if also_map_hydrogens:
        h_map = map_hydrogens(struct1, struct2, superpose_map)
        superpose_map.update(h_map)
    return superpose_map