Source code for schrodinger.application.jaguar.autots_rmsd

""" methods for comparison of structures using rmsd """
# Contributors: Leif D. Jacobson

import collections
import hashlib
import itertools

import numpy as np
from networkx.algorithms import isomorphism

import schrodinger.application.jaguar.autots_constants as autots_constants
from schrodinger.application.jaguar import file_logger
from schrodinger.application.jaguar.autots_bonding import active_atom_pairs
from schrodinger.application.jaguar.autots_bonding import \
    copy_autots_st_properties
from schrodinger.application.jaguar.autots_bonding import get_mmlewis_bonding
from schrodinger.application.jaguar.autots_exceptions import \
    TemplateAtomMapFailure
from schrodinger.comparison import are_conformers
from schrodinger.comparison import are_isomers
from schrodinger.comparison import chirality
from schrodinger.comparison.atom_mapper import ACTIVE_ATOM
from schrodinger.comparison.atom_mapper import BaseAtomMapper
from schrodinger.comparison.atom_mapper import discard_lewis_data
from schrodinger.comparison.atom_mapper import _fill_map
from schrodinger.comparison.exceptions import ConformerError
from schrodinger.structutils import build
from schrodinger.structutils import rmsd
from schrodinger.structutils import transform

hashfunc = hashlib.sha256
MapScore = collections.namedtuple("MapScore", ["score", "atom_map"])


[docs]def reform_barely_broken_bonds( st1, st2, fraction_different=autots_constants.FRACTION_DIFFERENT, reform_always=False): """ Given two structures that have consistently ordered atoms, reform bonds that have not broken very much (stretched by less than fraction_different*d0). Return False if not all broken bonds can be reformed. Structures are run through mmlewis to get new bond orders and formal charges. :type st1: Structure :param st1: the first structure :type st2: Structure :param st2: the second structure :type fraction_different: float :param fraction_different: if a bond breaks then it is also required that the bond distance increases by a factor of fraction_different*d0 where d0 is the bond distance before breaking :type reform_always: bool :param reform_always: If True, reform barely broken bonds even when doing so will not make the two structures into conformers. If False, only reform such bonds if doing so would render the two structures conformers. """ active_pairs = active_atom_pairs(st1, st2) are_conf = True if active_pairs: operations = collections.defaultdict(list) for i, j in active_pairs: dij_st1 = st1.measure(i, j) dij_st2 = st2.measure(i, j) # bonded structure will have a smaller distance if dij_st1 < dij_st2: frac = (dij_st2 - dij_st1) / dij_st1 st_broken = st2 else: frac = (dij_st1 - dij_st2) / dij_st2 st_broken = st1 # if change is within tolerance, add an operation to dict # else we declare the structures to not be conformers if frac <= fraction_different: operations[st_broken].append((i, j, 1)) else: are_conf = False # re-bond broken bonds if all changes are within tolerance if are_conf or reform_always: for st in operations: v = operations[st] if v: st.addBonds(v) get_mmlewis_bonding(st) return are_conf
[docs]def molecule_lists_are_conformers(m1, m2, same_molecularity=True): """ Compare two non-empty lists of structures and return True if matching pairs of conformers can be found. The structures in m1 and m2 are assumed to be fully connected (molecules) if same_molecularity==True. Returns False if the number of molecules differs and same_molecularity==True. :type m1: list of structures :param m1: first list of molecules :type m2: list of structures :param m2: first list of molecules :type same_molecularity: bool :param same_molecularity: if True, require that the two lists be the same length (i.e. number of molecules) :rtype: bool :return: whether or not the two lists of molecules are conformers """ if same_molecularity and len(m1) != len(m2): return False assert len(m1) * len(m2) > 0 all_st = [] for st_list in [m1, m2]: st_all = st_list[0].copy() for st in st_list[1:]: st_all.extend(st) all_st.append(st_all) conformers = are_isomers(*all_st) if conformers: mapper = AutoTSAtomMapper(optimize_mapping=False, use_chirality=False) atlist = list(range(1, all_st[0].atom_total + 1)) conformers = mapper.are_conformers(all_st[0], atlist, all_st[1], atlist) return conformers
[docs]def align_all_molecules(st1, st2, sort_rms=True): """ Inspects whether or not all the molecules in reactant are conformers of those in product. If they are, each molecule in product is superimposed onto the corresponding reactant molecule. :type st1: schrodinger.structure.Structure instance :param st1: structure 1 :type st2: schrodinger.structure.Structure instance :param st2: structure 2 :type sort_rms: boolean :param sort_rms: If True the structure in which the inter-molecule distances are larger is superimposes onto the smaller, see sort_by_centroid_distance, which does the sorting. """ if are_conformers(st1, st2, use_lewis_structure=False): if sort_rms: destination, source = sort_by_centroid_distance([st1, st2]) else: source = st2 destination = st1 for mol in st1.molecule: atlist = mol.getAtomIndices() rmsd.superimpose(destination, atlist, source, atlist, move_which=rmsd.MOLECULES)
[docs]def sort_by_centroid_distance(strs): """ Given a list of structures sort the list in non-decreasing order using the rms distance between molecules in the structures. e.g. for two structures which are each water dimers, the water dimer with the smaller intermolecular distance would appear first in the list :type strs: list of schrodinger.structure.Structure instances :param strs: list of structures to be sorted :rtype: list of schrodinger.structure.Structure instances :return: sorted list """ nstrs = len(strs) dists = [0.0 for i in range(nstrs)] for ii, st in enumerate(strs): nmol = len(st.molecule) if nmol > 1: mol_list = [] for mol in st.molecule: mol_list.append(mol.extractStructure()) for i in range(nmol): moli = mol_list[i] xi, yi, zi, junk = transform.get_centroid(moli) for j in range(i + 1, nmol): molj = mol_list[j] xj, yj, zj, junk = transform.get_centroid(molj) dists[ii] += (xi - xj)**2.0 + (yi - yj)**2.0 + (zi - zj)**2.0 dists[ii] /= float(nmol) ind = np.argsort(dists) out_strs = [] for i in ind: out_strs.append(strs[i]) return out_strs
[docs]def align_path_strs(path_strs): """ Align the structures in a path. :type path_strs: list of structures :param path_strs: the structures along the path """ if not path_strs: return atlist = [i + 1 for i in range(path_strs[0].atom_total)] # this imposes structure 1 on 0, 2 on 1, 3 on 2, etc. for st1, st2 in zip(path_strs[1:], path_strs[:-1]): rms = rmsd.superimpose(st2, atlist, st1, atlist, move_which=rmsd.CT)
[docs]def order_atoms(reactant, product, debug=False): """ Renumber atoms in reactant and product in a consistent fashion. :type reactant: schrodinger.structure.Structure :param reactant: reactant structure :type product: schrodinger.structure.Structure :param product: reactant structure :rtype: tuple of two structures :return: renumbered reactant and product """ r_copy = reactant.copy() p_copy = product.copy() # first try using chirality atlist = range(1, r_copy.atom_total + 1) mapper = AutoTSAtomMapper(optimize_mapping=True, use_chirality=True, debug=debug) try: r_new, p_new = mapper.reorder_structures(r_copy, atlist, p_copy, atlist) except ConformerError as e: mapper = AutoTSAtomMapper(optimize_mapping=True, use_chirality=False, debug=debug) r_new, p_new = mapper.reorder_structures(r_copy, atlist, p_copy, atlist) copy_autots_st_properties(reactant, r_new) copy_autots_st_properties(product, p_new) return r_new, p_new
[docs]class AutoTSAtomMapper(BaseAtomMapper): """ Atom mapper used by AutoTS for reordering atoms of conformers. Uses element types and optionally chirality to equate atoms and all bonds are considered equivalent. Stereochemistry on atoms which are marked with the property %s are ignored when comparing stereochemistry. This property is used to mark the atoms in the reaction center. Usage example: mapper = AutoTSAtomMapper(optimize_mapping=True, use_chirality=False) st1, st2 = mapper.reorder_atoms(st1, range(1, st1.atom_total + 1), st2, range(st2.atom_total + 1)) """ % autots_constants.IGNORE_ATOM_STEREO RMSD_THRESH = 1.0e-8
[docs] def __init__(self, optimize_mapping, use_chirality, debug=False): """ :type optimize_mapping: boolean :param optimize_mapping: if True search over all bijections to find the one with lowest score :type use_chirality: boolean :param use_chirality: if True, in addition to element type use chirality (where defined) to equate atoms. """ super(AutoTSAtomMapper, self).__init__(optimize_mapping, debug) self.use_chirality = use_chirality
[docs] def initialize_atom_types(self, st, invert_stereocenters=False): """ Initialize the atom types :type st: Structure :param st: structure containing atoms :type invert_stereocenters: boolean :param invert_stereocenters: whether or not R/S labels should be flipped """ def atom_label(atom): """ creates a label which is typically just the element name but also contains a special atom class integer if the atom has the property %s """ % autots_constants.AUTOTS_ATOM_CLASS atom_class = atom.property.get( str(autots_constants.AUTOTS_ATOM_CLASS), "") label = atom.element if atom_class: label += "-class=%s" % atom_class return label # uses element-chirality if self.use_chirality: #Discard Lewis data before determining chirality st_copy = discard_lewis_data(st) ch = chirality.get_chirality(st_copy) if invert_stereocenters: self.invert_chirality(ch) for at in st.atom: # have to get rid of these atom number chiralities here # since not using Lewis structure, E/Z isomerism doesn't apply if ch[at.index - 1] in ["ANR", "ANS", "E", "Z"]: ch[at.index - 1] = None self.set_atom_type( at, atom_label(at) + "-chiral=%s" % ch[at.index - 1]) else: for at in st.atom: self.set_atom_type(at, atom_label(at))
[docs] def score_mapping(self, st1, st2, atset): """ Score a mapping over the atoms listed in atlist. This returns a number of chirality mis-alignments and an rms mis-alignments are neglected for atoms which are marked with atom property %s :type st1: Structure :param st1: first structure :type st2: Structure :param st2: second structure :type atset: set of ints :param atset: set of atoms that have been mapped (indexes refer to both structures) :rtype: 5-tuple of (negative) int, (non-negative) int, (non-positive) int, (positive) int, and real :return: the negative of the number of active bonds, the number of misaligned atom-numbering stereocenters, the negative of the number of active atoms, the number of separate components, and the rmsd """ % autots_constants.IGNORE_ATOM_STEREO atlist = list(atset) # move a copy st1_working = st1.copy() st2_working = st2.copy() rms = rmsd.superimpose_substructure_molecules(st1_working, atlist, st2_working, atlist) # according to cProfile this is the bottleneck ch1 = chirality.get_atom_numbering_chirality(st1_working) ch2 = chirality.get_atom_numbering_chirality(st2_working) misalignments = 0 # only compare chirality for atoms which have all their # neighbors mapped atlist_neighbors_mapped = [ i for i in atlist if all( [at.index in atset for at in st1_working.atom[i].bonded_atoms]) ] for i in atlist_neighbors_mapped: ignore_stereo = st1.atom[i].property.get(autots_constants.IGNORE_ATOM_STEREO, False) or \ st2.atom[i].property.get(autots_constants.IGNORE_ATOM_STEREO, False) if (chirality.valid_chiral_comp(ch1[i - 1], ch2[i - 1]) and ch1[i - 1] != ch2[i - 1]) and not ignore_stereo: misalignments += 1 # In order to encourage larger n-membered TS's, we prefer solutions that # increase the number of active atoms (even at the expense of RMSD). # We include the negative of the number of active atoms to maximize this when # we take the minimum score later. # renumber.py has added this property to the active atoms for the purposes of # renumbering. It also handles removing the label when renumbering is complete. active_indices = set() for i in atlist: if st1.atom[i].property.get(autots_constants.IGNORE_ATOM_STEREO, False) or \ st2.atom[i].property.get(autots_constants.IGNORE_ATOM_STEREO, False): active_indices.add(i) n_active_bonds, n_components = self.determine_active_bonds_info( st1, st2, atlist) score = (-n_active_bonds, misalignments, -len(active_indices), n_components, rms) if self.debug: ignored_atoms = [] for i in atlist_neighbors_mapped: if st1.atom[i].property.get(autots_constants.IGNORE_ATOM_STEREO, False) or \ st2.atom[i].property.get(autots_constants.IGNORE_ATOM_STEREO, False): ignored_atoms.append(i) print("ignoring stereo for the following atoms", ignored_atoms) st1_working.append("scoring.mae") st2_working.append("scoring.mae") file_logger.register_file("scoring.mae") print("score", score) print("misalignments:") for i in atlist_neighbors_mapped: if ch1[i - 1] != ch2[i - 1]: print(i, ch1[i - 1], ch2[i - 1]) return score
[docs] def score_is_equivalent(self, score1, score2): """ Here we declare 2 scores equivalent if the same number of chirality mismatches are present and the rmsd difference is within RMSD_THRESH. This resolves machine dependent descrepancies in the chosen map. :type score1: tuple :param score1: the first score (chirality mismatches, active atoms, rms) :type score2: tuple :param score2: the first score (chirality mismatches, active atoms, rms) :return: boolean indicating if the score is the same or different """ same_int_scores = score1[:4] == score2[:4] same_rms = abs(score1[4] - score2[4]) < self.RMSD_THRESH return same_int_scores and same_rms
[docs] def determine_active_bonds_info(self, st1, st2, atlist): """ Determine the number of active bonds after numbering and the number of components. It is possible for the numbering code to "deactivate" an active bond and such numbering should be considered invalid. The way this might happen is for the numbering to lead to two active bonds to be numbered equivalently. Say you have a reactant water with an O-H active and a product water with an O-H active (for example, in a water wire). The numbering code could decide to number both of these O's the same and both H's the same. Thus, the code thinks it has both O-H's active but, in reality, now neither are because the equivalently numbered atoms have bonds in both reactant and product. The active bonds have been stored as a binary vector (i.e. an int). An active bond- destroying numbering will have the same two atom numbers as the endpoint of more than one active bond. We define the number of 'components' as the number of disjoint sets of molecules that are involved in active bonds. That is, if all active bonds in reactants and products were drawn in one structure, how many "molecules" would there be? For example, it is possible to number a set of bonds in the reaction A+B+C+D -> E+F, where A+B->E and C+D->F or one in which atoms from all 4 reactant molecules appear among all 2 of the products. The former case is a 2-component reaction (consisting of (A, B, E) and (C, D, F)) and the latter is a 1-component reaction (A, B, C, D, E ,F). We assume that the user put in an irreducible reaction, and hence fewer component renumbered solutions are to be preferred. :type st1: Structure :param st1: first structure :type st2: Structure :param st2: second structure :type atlist: list of ints :param atlist: list of atoms that have been mapped (indexes refer to both structures) :rtype: (int, int) :return: number of active bonds from this numbering, number of components """ def get_set_bits(bond_bitstring): """ Get the bits that are set in the int listing the active bonds We check if the last bit is set (by anding with 1) and, if so, append its position to a list. We then bit-shift the entire int to the right (which deletes the bit just checked and moves the next-to-last bit to the last bit). We continue checking and shifting until the bitstring is exhausted (i.e. there are no bits set anywhere, which happens when the bitstring is 0) """ output = [] counter = 0 while bond_bitstring != 0: if bond_bitstring & 1: output.append(counter) bond_bitstring = bond_bitstring >> 1 counter += 1 return output a_bond_list = collections.defaultdict(list) for at in atlist: for st in [st1, st2]: bond_bitstring = st.atom[at].property.get( autots_constants.BOND_BITSTRING, 0) for bond in get_set_bits(bond_bitstring): a_bond_list[bond].append(at) bonds = set() # Make sure all active bonds found are valid for bond in a_bond_list.values(): if len(bond) == 2: bonds.add(tuple(sorted(bond))) st_all = st1.copy() st_all.addBonds([(i, j, 1) for i, j in bonds]) return len(bonds), st_all.mol_total
[docs]class AutoTSTemplateAtomMapper(AutoTSAtomMapper): """ Atom mapper used by AutoTS's template code for reordering atoms to align template and input. Uses element types to equate atoms and all bonds are considered equivalent. Stereochemistry on atoms which are marked with the property %s are ignored when comparing stereochemistry. This differs from the AutoTSAtomMapper in that it simultaneously renumbers 4 structures instead of 2, assuming two pairs of those structures are already consistently numbered. Moreover, the return value of this class is the optimal map that accomplishes the renumbering, as opposed to the renumbered Structures themselves. Usage example: mapper = AutoTSTemplateAtomMapper(optimize_mapping=True, use_chirality=False) map = mapper.choose_template_map(reactant, product, input_indexes, r_template, p_template, template_indexes) """ % autots_constants.IGNORE_ATOM_STEREO RMSD_THRESH = 1.0e-8
[docs] def initialize_atom_types(self, st, active_ats, invert_stereocenters=False): """ Initialize the atom types :type st: Structure :param st: structure containing atoms :type active_ats: list of ints :param active_ats: atom indices of the reaction center :type invert_stereocenters: boolean :param invert_stereocenters: whether or not R/S labels should be flipped """ for at in st.atom: if autots_constants.AUTOTS_ATOM_CLASS in at.property: del at.property[autots_constants.AUTOTS_ATOM_CLASS] super().initialize_atom_types(st, invert_stereocenters) for at in active_ats: at_type = self.get_atom_type(st.atom[at]) at_type += ACTIVE_ATOM self.set_atom_type(st.atom[at], at_type)
def _order_atoms(self, st1, atset1, st2, atset2, st3, st4, input_rc, template_rc): """ Order atoms by first recursively removing topologically degenerate terminal atoms :type st1: Structure :param st1: the first structure :type atset1: set of ints :param atset1: set of atom indexes defining the first substructure :type st2: Structure :param st2: the second structure :type atset2: set of ints :param atset2: set of atom indexes defining the second substructure :type st3: Structure :param st3: the third structure, already assumed to be numbered consistently with st1 :type st4: Structure :param st4: the fourth structure, already assumed to be numbered consistently with st2 :type input_rc: set of ints :param input_rc: reaction center atom indices of st1, st3 :type template_rc: set of ints :param template_rc: reaction center atom indices of st2, st4 :return: the four structures with structure 2, 4 having had atoms reordered and the overall map which accomplished this """ st1_renorm, k1, e1 = self._eliminate_degenerate_terminal_atoms( st1, atset1) st2_renorm, k2, e2 = self._eliminate_degenerate_terminal_atoms( st2, atset2) st3_renorm, k3, e3 = self._eliminate_degenerate_terminal_atoms( st3, atset1) st4_renorm, k4, e4 = self._eliminate_degenerate_terminal_atoms( st4, atset2) # check elimination for similarity or if we tried to eliminate the rxn center if not (self.isomeric_atom_sets(st1, e1, st2, e2) and self.isomeric_atom_sets(st2, e2, st3, e3) and self.isomeric_atom_sets(st3, e3, st4, e4)) or len( e1.intersection(input_rc)) > 0 or len( e2.intersection(template_rc)) > 0: e1.clear() e2.clear() e3.clear() e4.clear() # if we've eliminated any terminal atoms recurse if e1 and e2 and e3 and e4: st1_reord, st2_reord, st3_reord, st4_reord, overall_map = self._order_atoms( st1_renorm, k1, st2_renorm, k2, st3_renorm, st4_renorm, input_rc, template_rc) # order eliminated atoms # because st1-st3 (r and p) and st2-st4 (r_template and p_template) are consistently numbered # one actually need only consider st1 and st2 and then apply the same map selection to the # other two st. st1_reord, st2_reord, st3_reord, st4_reord, app_map = self._append_terminal_atoms( st1_reord, e1, st2_reord, e2, st3_reord, st4_reord, k1) overall_map.update(app_map) # order core else: st1_reord, st2_reord, st3_reord, st4_reord, overall_map = self._order_core_atoms( st1, atset1, st2, atset2, st3, st4, input_rc, template_rc) return st1_reord, st2_reord, st3_reord, st4_reord, overall_map def _append_terminal_atoms(self, st1, term1, st2, term2, st3, st4, core): """ Map the terminal atoms in the list term1 and term2. The atom indexes in the list core should be consistently ordered between st1 and st2. :type st1: Structure :param st1: the first structure :type term1: set of ints :param term1: set of terminal atom indexes in st1 :type st2: Structure :param st2: the second structure :type term2: set of ints :param term2: set of terminal atom indexes in st2 :type st3: Structure :param st3: the third structure, already assumed to be numbered consistently with st1 :type st4: Structure :param st4: the fourth structure, already assumed to be numbered consistently with st2 :type core: set of atoms in core structure which are already numbered consistently :return: four new structures, map """ # keeps track of which atoms have been mapped overall_map = dict() have_mapped = core.copy() for icore in core: types1 = self._group_neighbors(st1.atom[icore], term1) types2 = self._group_neighbors(st2.atom[icore], term2) # check that name of groups and lengths are the same group_cnts1 = {grp: len(types1[grp]) for grp in types1} group_cnts2 = {grp: len(types2[grp]) for grp in types2} if group_cnts1 != group_cnts2: raise ConformerError( "Groups are not the same when appending terminal atoms") # each group has a list of mappings group_mappings = collections.defaultdict(list) for group in types1: for p in itertools.permutations(types2[group], len(types2[group])): group_mappings[group].append(list(zip(types1[group], p))) # the set of atoms getting mapped mapped_atoms = set() for lst in types1.values(): mapped_atoms.update(lst) # cartesian product of all groups to get # all mappings of this atoms neighbors best_score = None best_maps = [] for mapping in itertools.product(*group_mappings.values()): # get a mapping of terminal atoms attached to this core full_list = [] for group_mapping in mapping: full_list.extend(group_mapping) # set initial map to None full_map = [None for i in range(st2.atom_total)] # map everything that has been mapped for j in have_mapped: full_map[j - 1] = j # map this grouping of atoms for group_mapping in mapping: for i, j in group_mapping: full_map[i - 1] = j # fill in Nones _fill_map(full_map) # RMS is computed over core atom and added terminals # whereas chirality need only be computed for the new core atom # this is because chirality is determined by core + terminal st2_reord = build.reorder_atoms(st2, full_map) st4_reord = build.reorder_atoms(st4, full_map) all_mapped = have_mapped.union(mapped_atoms) score_r = self.score_mapping(st1, st2_reord, all_mapped) score_p = self.score_mapping(st3, st4_reord, all_mapped) score = tuple(map(sum, zip(score_r, score_p))) map_score = MapScore(score, full_map) self._update_map_list(best_maps, map_score) if not self.optimize_mapping: break ## now pick the best map. If there are ties, choose (entirely arbitrarily) ## the one that preserves the relative ordering of the atoms being reordered ## as best as possible if best_maps: best_score, best_mapping = min(best_maps, key=lambda bij: bij.atom_map) # save the overall map for at in mapped_atoms: overall_map[at] = st2.atom[best_mapping[at - 1]].property[ self.MAPPER_INDEX] if self.debug: if best_maps: print("Best app maps") for mapping in best_maps: print(str(mapping.score)) print("overall map in append", overall_map) st2 = build.reorder_atoms(st2, best_mapping) st4 = build.reorder_atoms(st4, best_mapping) # we just mapped terminal atoms have_mapped.update(mapped_atoms) return st1, st2, st3, st4, overall_map def _order_core_atoms(self, st1, atset1, st2, atset2, st3, st4, input_rc, template_rc): """ re-order the atoms atset2 in st2/st4 to be consistent with atset1 in st1/st3. Graphs made from the atom indexes listed in atset1 and atset2 must be isomorphic. :type st1: Structure :param st1: the first structure :type atset1: set of ints :param atset1: set of atom indexes defining the first substructure :type st2: Structure :param st2: the second structure :type atset2: list of ints :param atset2: list of atom indexes defining the second substructure :type st3: Structure :param st3: the third structure, already assumed to be numbered consistently with st1 :type st4: Structure :param st4: the fourth structure, already assumed to be numbered consistently with st2 :type input_rc: set of ints :param input_rc: reaction center atom indices of st1, st3 :type template_rc: set of ints :param template_rc: reaction center atom indices of st2, st4 :return: the four structures with structure 2, 4 having had atoms reordered and the overall map which accomplished this """ if len(atset1) != len(atset2): raise ConformerError("length of atom lists in core region differ") r_graph = self.st_to_graph(st1, atset1) r_template_graph = self.st_to_graph(st2, atset2) p_graph = self.st_to_graph(st3, atset1) p_template_graph = self.st_to_graph(st4, atset2) r_matcher = isomorphism.GraphMatcher(r_graph, r_template_graph, self.comparator) p_matcher = isomorphism.GraphMatcher(p_graph, p_template_graph, self.comparator) best_maps = [] overall_map = dict() for trial_map in _template_map_iter(r_matcher, p_matcher, input_rc, template_rc): atom_map = [None for i in range(st2.atom_total)] for k, v in trial_map.items(): atom_map[k - 1] = v _fill_map(atom_map) r_t_reord = build.reorder_atoms(st2, atom_map) p_t_reord = build.reorder_atoms(st4, atom_map) score_r = self.score_mapping(st1, r_t_reord, atset1) score_p = self.score_mapping(st3, p_t_reord, atset1) score = tuple(map(sum, zip(score_r, score_p))) map_score = MapScore(score, atom_map) self._update_map_list(best_maps, map_score) if best_maps: best_score, best_mapping = min(best_maps, key=lambda bij: bij.atom_map) else: raise TemplateAtomMapFailure r_t_reord = build.reorder_atoms(st2, best_mapping) p_t_reord = build.reorder_atoms(st4, best_mapping) for i in atset1: overall_map[i] = best_mapping[i - 1] if self.debug: print("best_score", best_score) print(overall_map) return st1, r_t_reord, st3, p_t_reord, overall_map
[docs] def choose_template_map(self, reactant, product, input_indexes, r_template, p_template, template_indexes): """ Determine the optimal map from the input reactant and product structures to the template reactant and product structures. :type reactant: Structure instance :param reactant: input reactant structure :type product: Structure instance :param product: input product structure :type input_indexes: RxnIndxDecomp instance :param input_indexes: breakdown of input atom indexes into core, reaction center :type r_template: Structure instance :param r_template: template reactant structure :type p_template: Structure instance :param p_template: template product structure :type template_indexes: RxnIndxDecomp instance :param template_indexes: breakdown of template atom indexes into core, reaction center :rtype: dict :return: mapping from input number to template numbering """ r_working = reactant.copy() p_working = product.copy() r_t_working = r_template.copy() p_t_working = p_template.copy() for sts, indexes in zip( [[r_working, p_working], [r_t_working, p_t_working]], [input_indexes, template_indexes]): for st in sts: self.initialize_atom_types(st, indexes.rxn_center) for at in indexes.rxn_center: st.atom[at].property[ autots_constants.IGNORE_ATOM_STEREO] = True for at in st.atom: at.property[self.MAPPER_INDEX] = at.index # Because our scoring works by reordering the structures, the actual map # determination must involve the system with more atoms being mapped onto the # system with fewer atoms (since you can't reorder the system with fewer atoms to # have atom indices greater than its number of atoms. # We simply reverse the map if need be to ensure a map from input to template if r_working.atom_total <= r_t_working.atom_total: r_working, r_t_working, p_working, p_t_working, overall_map = self._order_atoms( r_working, set(input_indexes.core), r_t_working, set(template_indexes.core), p_working, p_t_working, set(input_indexes.rxn_center), set(template_indexes.rxn_center)) if self.debug: r_t_working.write("rt_reord.mae") file_logger.register_file("rt_reord.mae") p_t_working.write("pt_reord.mae") file_logger.register_file("pt_reord.mae") else: r_t_working, r_working, p_t_working, p_working, reverse_map = self._order_atoms( r_t_working, set(template_indexes.core), r_working, set(input_indexes.core), p_t_working, p_working, set(template_indexes.rxn_center), set(input_indexes.rxn_center)) overall_map = {v: k for k, v in reverse_map.items()} if self.debug: r_working.write("rt_reord_rev.mae") file_logger.register_file("rt_reord_rev.mae") p_working.write("pt_reord_rev.mae") file_logger.register_file("pt_reord_rev.mae") if self.debug: print("final overall_map", overall_map) return overall_map
def _template_map_iter(r_matcher, p_matcher, input_rc, template_rc): """ Iterator over valid template isomorphism atom maps. Valid atom maps must exist between both reactants and product and map the reaction center to the reaction center. Since template r and p are consistently numbered and input r and p are consistently numbered, only maps that appear in both lists are good maps to use. That is, symmetry breaking may only occur on one side, a map that appears in one may not take this symmetry breaking into account. :type r_matcher: GraphMatcher instance :param r_matcher: iterator over isomorphic mappings of the input reactant to template reactant :type p_matcher: GraphMatcher instance :param p_matcher: iterator over isomorphic mappings of the input product to template product :type input_rc: list of ints :param input_rc: atom indices of input reaction center :type template_rc: list of ints :param template_rc: atom indices of template reaction center """ def image(mapping, domain): """ return image of a map given domain """ return set(v for k, v in mapping.items() if k in domain) p_maps = set( frozenset(isomorph.items()) for isomorph in p_matcher.isomorphisms_iter() if image(isomorph, input_rc) == set(template_rc)) for isomorph in r_matcher.isomorphisms_iter(): if frozenset(isomorph.items()) in p_maps: yield isomorph