Source code for schrodinger.application.matsci.reorder

"""
Creates a frame that contains widgets to aid in reordering two structures to
have the same atom order, plus functions to estimate atom ordering.

Copyright Schrodinger, LLC. All rights reserved.
"""

import warnings

from schrodinger.application.matsci import clusterstruct
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.structutils import measure
from schrodinger.structutils import rmsd
from schrodinger.structutils import smiles


[docs]def map_hydrogens(struct1, struct2, atom_map): """ For all heavy atoms already mapped, if they only have a single hydrogen attached to them we can also map that hydrogen. :type struct1: `schrodinger.structure.Structure` :param struct1: The first structure :type struct2: `schrodinger.structure.Structure` :param struct2: The structure to compare to the first structure :type atom_map: dict :param atom_map: keys are atom indexes for struct1, values are atom indexes for struct2. Each key, value pair represents an atom that is the same atom in both structures. No atoms in this dictionary will be added to the dictionary returned by this method. :rtype: dict :return: dictionary, keys are indexes of H atoms in struct1 that were mapped by this method, each value is the index of a struct2 H atom that maps to the key atom. """ def bonded_unmapped_protons(atom, mapped_atoms): protons = [] for neighbor in atom.bonded_atoms: if neighbor.element == 'H' and neighbor.index not in mapped_atoms: protons.append(neighbor.index) return protons h_map = {} mapped_comp_atoms = set(atom_map.values()) for index1, index2 in atom_map.items(): atom1 = struct1.atom[index1] atom2 = struct2.atom[index2] protons1 = bonded_unmapped_protons(atom1, set(list(atom_map))) protons2 = bonded_unmapped_protons(atom2, set(atom_map.values())) if len(protons1) == len(protons2): if len(protons1) == 1: key = protons1[0] comp = protons2[0] if key not in atom_map and comp not in mapped_comp_atoms: h_map[key] = comp elif len(protons1) > 1: h_map.update( map_neighboring_hydrogens(struct1, struct2, atom1, protons1, protons2, atom_map)) return h_map
[docs]def map_neighboring_hydrogens(struct1, struct2, atom, protons1, protons2, atom_map, threshold=10.): """ If two or more hydrogens are bonded to an atom, we have to look at the similarities of the dihedrals between the structures to map them from struct2 to struct1 :type struct1: `schrodinger.structure.Structure` :param struct1: The first structure :type struct2: `schrodinger.structure.Structure` :param struct2: The structure to compare to the first structure :type atom: `schrodinger.structure._StructureAtom` :type atom: The atom the reference protons are bonded to - the index of this atom must be a key in atom_map :type protons1: list :param protons1: list of atom indexes of protons bound to atom in struct1 :type protons2: list :param protons2: list of atom indexes of protons bound to atom in struct2 :type atom_map: dict :param atom_map: keys are atom indexes for struct1, values are atom indexes for struct2. Each key, value pair represents an atom that is the same atom in both structures :type threshold: float :param threshold: The dihedral angle difference threshold for considering protons "the same" between the two structures (degrees). :rtype: dict :return: dictionary, keys are indexes of H atoms in struct1 that were mapped by this method, each value is the index of a struct2 H atom that maps to the key atom. """ def mapped_heavy_neighbors(myatom): # Return a list of all heavy atoms bound to myatom that are mapped heavies = [] for neighbor in myatom.bonded_atoms: if neighbor.element != 'H' and neighbor.index in atom_map: heavies.append(neighbor) return heavies def add_dihedral_heavies(nlist): # Add atoms in nlist to the list of dihedral atoms unless they are # already in the list new = [(x, x.index) for x in nlist if x.index not in dihedral_indexes] while len(dihedral_heavies) < 3 and new: new_atom, new_index = new.pop(0) dihedral_heavies.append(new_atom) dihedral_indexes.add(new_index) # Only attempt to map if there are the same number of unmapped hydrogens in # both structures (MATSCI-961, MATSCI-966) if len(protons1) != len(protons2): return {} # First heavy atom in the dihedral is atom itself dihedral_heavies = [atom] dihedral_indexes = {atom.index} # First attempt to use any heavy atoms directly bonded to atom neighbors = mapped_heavy_neighbors(atom) add_dihedral_heavies(neighbors) # If we don't have enough atoms for a dihedral, check for mapped neighbors # of any heavy atoms directly bonded to atom if len(dihedral_heavies) < 3: if neighbors: neighbors2 = mapped_heavy_neighbors(neighbors[0]) add_dihedral_heavies(neighbors2) # Just add random mapped atoms until we get a satisfactory set of atoms if len(dihedral_heavies) < 3: for index in atom_map.keys(): newatom = struct1.atom[index] if newatom.element != 'H': add_dihedral_heavies([newatom]) if len(dihedral_heavies) == 3: break else: # Couldn't find enough heavy atoms return {} dihedral_heavies2 = [ struct2.atom[atom_map[x.index]] for x in dihedral_heavies ] # Compute all the dihedral angles def dihedral_delta(torsion1, torsion2): # Calculate the delta between two dihedrals. This is just the # difference between them unless each is near 180 and opposite in sign delta = abs(torsion1 - torsion2) # Catch instances near +-180 if delta >= 360 - threshold: delta = abs(delta - 360) return delta angle = measure.measure_dihedral_angle hatoms1 = [struct1.atom[x] for x in protons1] dihedrals1 = [(x.index, angle(x, *dihedral_heavies)) for x in hatoms1] hatoms2 = [struct2.atom[x] for x in protons2] dihedrals2 = [(x.index, angle(x, *dihedral_heavies2)) for x in hatoms2] # Match dihedral angles that are within the threshold h_map = {} for index2, torsion2 in dihedrals2: for index1, torsion1 in dihedrals1: if dihedral_delta(torsion1, torsion2) <= threshold and \ index1 not in atom_map and index2 not in atom_map.values(): # Found a match between two unmapped hydrogens if index1 not in h_map and index2 not in h_map.values(): # We haven't paired either of these hydrogens yet h_map[index1] = index2 if len(h_map) == len(protons1): # A mapping was found for each proton attached to atom return h_map else: # We either map all the unmapped hydrogens attched to atom or none return {}
[docs]def find_atoms_near_point(struct, distance, xyz, dcell=None): """ Find all atoms in struct that are within distance of point xyz. It is PBC aware. :type struct: `schrodinger.structure.Structure` :param struct: The structure containing the atoms to search. :type distance: float :param distance: Find all atoms within this distance in Angstroms :type xyz: list :param xyz: list of [x, y, z] coordinates :type dcell: `schrodinger.infra.structure.DistanceCell` or None :param dcell: The infrastructure DistanceCell created for input structure. If None, it will be created (can be slow). :rtype: list :return: list of atom indexes within distance of point xyz. The list is sorted by distance with the index of the closest atom first. """ if dcell is None: dcell, pbc = clusterstruct.create_distance_cell(struct, distance) neighbors = {} for match in dcell.query_atoms(*xyz): idx, distance = match.getIndex(), match.getDistanceSquared() # Keep only the shortest distance to the atom from xyz if ((idx in neighbors and neighbors[idx] > distance) or idx not in neighbors): neighbors[idx] = distance neighbors = sorted(neighbors, key=neighbors.get) return neighbors
[docs]def compare_molecular_formulas(struct1, struct2): """ Check to ensure the molecular formulas of both structures are the same :type struct1: `schrodinger.structure.Structure` :param struct1: The first structure :type struct2: `schrodinger.structure.Structure` :param struct2: The structure to compare to the first structure :rtype: bool, str, str :return: True if the molecular formulas are the same, False if not, and then both molecular formulas """ form1 = analyze.generate_molecular_formula(struct1) form2 = analyze.generate_molecular_formula(struct2) return form1 == form2, form1, form2
[docs]def map_by_lone_element(struct1, struct2, atom_map=None, check_formula=False): """ Return a map of atoms in struct2 to atoms in struct1. The only criteria used is that if only a single atom of an element exists in both structures, those atoms are mapped to each other. All other atoms are unmapped. Note that this function assumes that a reaction might have taken place between the two structures, so just because there is a single F atom in both structures does not mean that the C atoms bound to the F is the same in both structures. :type struct1: `schrodinger.structure.Structure` :param struct1: The first structure :type struct2: `schrodinger.structure.Structure` :param struct2: The structure to compare to the first structure :type atom_map: dict :param atom_map: keys are atom indexes for struct1, values are atom indexes for struct2. Each key, value pair represents an atom that is the same atom in both structures :type check_formula: bool :param check_formula: True if the molecular formulas should be checked to ensure they are identical, False if not :rtype: dict :return: A mapping of atom numbers in struct2 to atom numbers in struct1. keys are struct1 atom numbers, values are struct2 atom numbers. Dictionary contains only those atoms mapped by this method :raise ValueError: if the molecular formulas of the two structures are not equal and check_formula is not False """ if check_formula: identical, form1, form2 = compare_molecular_formulas(struct1, struct2) if not identical: raise ValueError('The molecular formula of the first structure, %s' 'is not identical to the molecular formula of the ' 'second structure, %s' % (form1, form2)) elements = {} elements2 = {} if atom_map is None: atom_map = {} element_map = {} set2 = set(atom_map.values()) def enumerate_elements(struct, mapped, new_map): """ For any atom in struct that is not in mapped, add to a list in new_map. new_map is a dictionary keyed by atomic symbol whose values are atom indexes of that element """ for atom in struct.atom: if atom.index not in mapped: new_map.setdefault(atom.element, []).append(atom.index) enumerate_elements(struct1, atom_map, elements) enumerate_elements(struct2, set2, elements2) # Find those elements for which there is only one atom in each struct for element, members in elements.items(): if len(members) == 1: members2 = elements2.get(element, []) if len(members2) == 1: element_map[members[0]] = members2[0] return element_map
[docs]def map_by_smiles(struct1, struct2, atom_map=None, also_map_hydrogens=True, stereo=smiles.STEREO_FROM_ANNOTATION_AND_GEOM): """ Return a map of atoms in struct2 to atoms in struct1. Unique SMILES strings are generated for both structures - if they are the same, then a mapping is produced that maps the atom indexes of struct2 onto the atom indexes of struct 1. Protons are not mapped, except where only a single proton is attached to a mapped atom. :type struct1: `schrodinger.structure.Structure` :param struct1: The first structure :type struct2: `schrodinger.structure.Structure` :param struct2: The structure to compare to the first structure :type atom_map: dict :param atom_map: keys are atom indexes for struct1, values are atom indexes for struct2. Each key, value pair represents an atom that is the same atom in both structures. No atoms in this dictionary are returned in the final mapping. :type also_map_hydrogens: bool :param also_map_hydrogens: By default, an attempt will be made to map hydrogen atoms based on the heavy atom mapping. Set to False to not do this. :type stereo: `smiles` module constant :param stereo: The stereo value to feed to the SMILES generator. By default this is smiles.STEREO_FROM_ANNOTATION_AND_GEOM, use smiles.NO_STEREO to generate SMILES strings with no stereo information :rtype: dict :return: A mapping of atom numbers in struct2 to atom numbers in struct1. keys are struct1 atom numbers values are struct2 atom numbers """ if atom_map is None: atom_map = {} sgen = smiles.SmilesGenerator(stereo=stereo, unique=True) # mapx is a list of atom indicies in the same order as the smiles string smiles1, maplist1 = sgen.getSmilesAndMap(struct1) smiles2, maplist2 = sgen.getSmilesAndMap(struct2) if smiles1 != smiles2: return {} smiles_map = dict(list(zip(maplist1, maplist2))) for key, value in atom_map.items(): try: if smiles_map[key] != value: # This SMILES map violates the already established mapping by # mapping a different comp atom to this ref atom return {} else: # This atom already appears in the atom map del smiles_map[key] except KeyError: # This atom isn't in the smiles map - it's probably a hydrogen pass if value in smiles_map.values(): # This SMILES map violates the already established mapping by # mapping a different ref atom to this comp atom return {} if also_map_hydrogens: h_map = map_hydrogens(struct1, struct2, smiles_map) smiles_map.update(h_map) return smiles_map
[docs]def map_by_smarts(struct1, struct2, atom_map=None, also_map_hydrogens=True): """ Return a map of atoms in struct2 to atoms in struct1. If the SMARTS pattern for each entire molecule matches, then atoms are set to that order. Otherwise, struct1 is searched for atoms that have a unique SMARTS pattern. If only one atom in struct2 also has that SMARTS pattern, the two atoms are mapped to each other. This is done for each unique pair in the two structures. :type struct1: `schrodinger.structure.Structure` :param struct1: The first structure :type struct2: `schrodinger.structure.Structure` :param struct2: The structure to compare to the first structure :type atom_map: dict :param atom_map: keys are atom indexes for struct1, values are atom indexes for struct2. Each key, value pair represents an atom that is the same atom in both structures. No atoms in this dictionary are returned in the final mapping. :type also_map_hydrogens: bool :param also_map_hydrogens: By default, an attempt will be made to map hydrogen atoms based on the heavy atom mapping even if they are not mapped via SMARTS patterns. Set to False to not do this. Even if this paramter is False, hydrogens that uniquely match SMARTS patterns will be mapped. :rtype: dict :return: A mapping of atom numbers in struct2 to atom numbers in struct1. keys are struct1 atom numbers values are struct2 atom numbers. Dictionary contains only those atoms mapped by this method. :raise ValueError: if the SMILES strings of the two structures do not match """ if atom_map is None: atom_map = {} sgen = smiles.SmilesGenerator(stereo=smiles.NO_STEREO, unique=True, wantAllH=True) def reorder_struct(struct): """ Return a re-ordered copy of struct, the smiles string, and a dictionary whose keys are struct atom indexes and values are the atom indexes in the reordered copy """ smiles, maplist = sgen.getSmilesAndMap(struct) numat = struct.atom_total # Remove any indexes that are higher than the total number of atoms. # These are H atoms that the SmilesGenerator added to fulfill what it # thinks are empty valences. maplist = [x for x in maplist if x <= numat] scopy = build.reorder_atoms(struct, maplist) smap = dict(enumerate(maplist, 1)) return scopy, smiles, smap # First we need to reorder the structures by their own unique SMILES to # have any chance of matching SMARTS for the whole structure s1_reordered, s1_smiles, s1_remap = reorder_struct(struct1) s2_reordered, s2_smiles, s2_remap = reorder_struct(struct2) smarts_map = {} if s1_smiles == s2_smiles: # The same SMILES pattern for both structures, just map all the atoms # The remap dictionaries have {original_atom_index: SMILES atom index} # so we map the SMILES atom index for struct1 to the SMILES atom # index for struct2. smarts_map = { x: y for x, y in zip(s1_remap.values(), s2_remap.values()) } for key, value in atom_map.items(): if smarts_map[key] != value: # This SMARTS map violates the already established mapping by # mapping a different comp atom to this ref atom smarts_map = {} break else: del smarts_map[key] if value in smarts_map.values(): # This SMARTS map violates the already established mapping by # mapping a different ref atom to this comp atom smarts_map = {} break if smarts_map: return smarts_map # Not a total structure SMARTS match, or an error occurred in which the # total SMARTS match didn't match the already mapped atoms, but maybe some # atoms will match. def fill_smarts_dict(struct, mapped): """ :type struct: `schrodinger.structure.Structure` :param struct: The struct to evalute SMARTS with :type mapped: set :param mapped: The set of atoms already mapped in this structure :rtype: dict :return: A dictionary of SMARTS patterns (keys) and list of atoms with that pattern (values) """ smarts = {} for atom in struct.atom: if atom.index not in mapped: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning, message="analyze.generate_smarts") asmart = analyze.generate_smarts(struct, [atom.index]) smarts.setdefault(asmart, []).append(atom.index) return smarts smap1 = fill_smarts_dict(struct1, set(list(atom_map))) smap2 = fill_smarts_dict(struct2, set(atom_map.values())) for pattern, members1 in smap1.items(): if len(members1) == 1: members2 = smap2.get(pattern, []) if len(members2) == 1: smarts_map[members1[0]] = members2[0] if also_map_hydrogens: h_map = map_hydrogens(struct1, struct2, smarts_map) smarts_map.update(h_map) return smarts_map
[docs]def map_by_superposition(struct1, struct2, atom_map, check_formula=False, preserve_elements=True, threshold=1.0, also_map_hydrogens=True): """ Return a map of atoms in struct2 to atoms in struct1. struct2 is first superimposed on struct1 using the atom lists supplied. Then all atoms in struct2 that are within threshold distance of an atom in struct1 are mapped to the closest atom. Note that this function assumes that a reaction might have taken place between the two structures, so just because there is a single F atom in both structures does not mean that the C atoms bound to the F is the same in both structures. :type struct1: `schrodinger.structure.Structure` :param struct1: The first structure :type struct2: `schrodinger.structure.Structure` :param struct2: The structure to compare to the first structure :type atom_map: dict :param atom_map: keys are atom indexes for struct1, values are atom indexes for struct2. Each key, value pair represents an atom that is the same atom in both structures. atom_map must be at least 3 atoms long - the superposition will be done using atom_map. No atoms in this dictionary are returned in the final mapping. :type check_formula: bool :param check_formula: True if the molecular formulas should be checked to ensure they are identical, False if not :type preserve_elements: bool :param preserve_elements: True if only atoms of the same element should be mapped to each other, False if not :type threshold: float :param threshold: Only atoms closer than this in Angstroms will be considered for mapping :type also_map_hydrogens: bool :param also_map_hydrogens: By default, an attempt will be made to map hydrogen atoms based on the heavy atom mapping even if they are not mapped via superposition. Set to False to not do this. Even if this paramter is False, hydrogens that map via superposition will be mapped. :rtype: dict :return: A mapping of atom numbers in struct2 to atom numbers in struct1. keys are struct1 atom numbers values are struct2 atom numbers :raise ValueError: if the molecular formulas of the two structures are not equal and check_formula is not False :raise RuntimeError: if atom_map is shorter than 3 atoms """ if check_formula: identical, form1, form2 = compare_molecular_formulas(struct1, struct2) if not identical: raise ValueError('The molecular formula of the first structure, %s' 'is not identical to the molecular formula of the ' 'second structure, %s' % (form1, form2)) dcell, pbc = clusterstruct.create_distance_cell(struct1, threshold) struct2_posed = struct2.copy() rmsd.superimpose(struct1, list(atom_map), struct2_posed, list(atom_map.values())) superpose_map = {} atoms1 = set(list(atom_map)) atoms2 = set(atom_map.values()) for atom in struct2_posed.atom: if atom.index in atoms2: # Already mapped continue for index in find_atoms_near_point(struct1, threshold, atom.xyz, dcell=dcell): if index in superpose_map or index in atoms1: continue if preserve_elements: neighbor = struct1.atom[index] if neighbor.element != atom.element: continue superpose_map[index] = atom.index break if also_map_hydrogens: h_map = map_hydrogens(struct1, struct2, superpose_map) superpose_map.update(h_map) return superpose_map