Source code for schrodinger.application.jaguar.autots_rmsd

""" methods for comparison of structures using rmsd """
# Contributors: Leif D. Jacobson

import collections
import hashlib
import itertools

import numpy as np
from networkx.algorithms import isomorphism

import schrodinger.application.jaguar.autots_constants as autots_constants
from schrodinger.application.jaguar import file_logger
from schrodinger.application.jaguar.autots_bonding import active_atom_pairs
from schrodinger.application.jaguar.autots_bonding import \
    copy_autots_st_properties
from schrodinger.application.jaguar.autots_bonding import get_mmlewis_bonding
from schrodinger.application.jaguar.autots_exceptions import \
    TemplateAtomMapFailure
from schrodinger.comparison import are_conformers
from schrodinger.comparison import are_isomers
from schrodinger.comparison import chirality
from schrodinger.comparison.atom_mapper import ACTIVE_ATOM
from schrodinger.comparison.atom_mapper import BaseAtomMapper
from schrodinger.comparison.atom_mapper import discard_lewis_data
from schrodinger.comparison.atom_mapper import _fill_map
from schrodinger.comparison.exceptions import ConformerError
from schrodinger.structutils import build
from schrodinger.structutils import rmsd
from schrodinger.structutils import transform

hashfunc = hashlib.sha256
MapScore = collections.namedtuple("MapScore", ["score", "atom_map"])


[docs]def reform_barely_broken_bonds(
        st1,
        st2,
        fraction_different=autots_constants.FRACTION_DIFFERENT,
        reform_always=False):
    """
    Given two structures that have consistently ordered atoms,
    reform bonds that have not broken very much (stretched by less than
    fraction_different*d0).
    Return False if not all broken bonds can be reformed.
    Structures are run through mmlewis to get new bond orders and formal charges.


    :type st1: Structure
    :param st1: the first structure
    :type st2: Structure
    :param st2: the second structure
    :type fraction_different: float
    :param fraction_different: if a bond breaks then it is also required that
                               the bond distance increases by a factor of
                               fraction_different*d0 where d0 is the bond
                               distance before breaking
    :type reform_always: bool
    :param reform_always: If True, reform barely broken bonds even when doing so
                          will not make the two structures into conformers. If False,
                          only reform such bonds if doing so would render the two structures
                          conformers.
    """

    active_pairs = active_atom_pairs(st1, st2)

    are_conf = True
    if active_pairs:
        operations = collections.defaultdict(list)
        for i, j in active_pairs:
            dij_st1 = st1.measure(i, j)
            dij_st2 = st2.measure(i, j)

            # bonded structure will have a smaller distance
            if dij_st1 < dij_st2:
                frac = (dij_st2 - dij_st1) / dij_st1
                st_broken = st2
            else:
                frac = (dij_st1 - dij_st2) / dij_st2
                st_broken = st1

            # if change is within tolerance, add an operation to dict
            # else we declare the structures to not be conformers
            if frac <= fraction_different:
                operations[st_broken].append((i, j, 1))
            else:
                are_conf = False

        # re-bond broken bonds if all changes are within tolerance
        if are_conf or reform_always:
            for st in operations:
                v = operations[st]
                if v:
                    st.addBonds(v)
                    get_mmlewis_bonding(st)

    return are_conf


[docs]def molecule_lists_are_conformers(m1, m2, same_molecularity=True):
    """
    Compare two non-empty lists of structures and return True if matching pairs
    of conformers can be found.  The structures in m1 and m2 are assumed
    to be fully connected (molecules) if same_molecularity==True.

    Returns False if the number of molecules differs and same_molecularity==True.

    :type m1: list of structures
    :param m1: first list of molecules
    :type m2: list of structures
    :param m2: first list of molecules
    :type same_molecularity: bool
    :param same_molecularity: if True, require that the two lists be the
                              same length (i.e. number of molecules)
    :rtype: bool
    :return: whether or not the two lists of molecules are conformers
    """

    if same_molecularity and len(m1) != len(m2):
        return False
    assert len(m1) * len(m2) > 0

    all_st = []
    for st_list in [m1, m2]:
        st_all = st_list[0].copy()
        for st in st_list[1:]:
            st_all.extend(st)
        all_st.append(st_all)

    conformers = are_isomers(*all_st)
    if conformers:
        mapper = AutoTSAtomMapper(optimize_mapping=False, use_chirality=False)
        atlist = list(range(1, all_st[0].atom_total + 1))
        conformers = mapper.are_conformers(all_st[0], atlist, all_st[1], atlist)

    return conformers


[docs]def align_all_molecules(st1, st2, sort_rms=True):
    """
    Inspects whether or not all the molecules in reactant
    are conformers of those in product.
    If they are, each molecule in product is superimposed onto the
    corresponding reactant molecule.

    :type st1: schrodinger.structure.Structure instance
    :param st1: structure 1
    :type st2: schrodinger.structure.Structure instance
    :param st2: structure 2
    :type sort_rms: boolean
    :param sort_rms:  If True the structure in which the inter-molecule distances are
                      larger is superimposes onto the smaller, see sort_by_centroid_distance,
                      which does the sorting.

    """

    if are_conformers(st1, st2, use_lewis_structure=False):

        if sort_rms:
            destination, source = sort_by_centroid_distance([st1, st2])
        else:
            source = st2
            destination = st1

        for mol in st1.molecule:
            atlist = mol.getAtomIndices()
            rmsd.superimpose(destination,
                             atlist,
                             source,
                             atlist,
                             move_which=rmsd.MOLECULES)


[docs]def sort_by_centroid_distance(strs):
    """
    Given a list of structures sort the list in non-decreasing order using the
    rms distance between molecules in the structures. e.g. for two structures
    which are each water dimers, the water dimer with the smaller
    intermolecular distance would appear first in the list

    :type strs: list of schrodinger.structure.Structure instances
    :param strs: list of structures to be sorted
    :rtype: list of schrodinger.structure.Structure instances
    :return: sorted list
    """

    nstrs = len(strs)
    dists = [0.0 for i in range(nstrs)]

    for ii, st in enumerate(strs):
        nmol = len(st.molecule)
        if nmol > 1:

            mol_list = []
            for mol in st.molecule:
                mol_list.append(mol.extractStructure())

            for i in range(nmol):
                moli = mol_list[i]
                xi, yi, zi, junk = transform.get_centroid(moli)
                for j in range(i + 1, nmol):
                    molj = mol_list[j]
                    xj, yj, zj, junk = transform.get_centroid(molj)

                    dists[ii] += (xi - xj)**2.0 + (yi - yj)**2.0 + (zi -
                                                                    zj)**2.0

            dists[ii] /= float(nmol)

    ind = np.argsort(dists)

    out_strs = []
    for i in ind:
        out_strs.append(strs[i])

    return out_strs


[docs]def align_path_strs(path_strs):
    """
    Align the structures in a path.

    :type path_strs: list of structures
    :param path_strs: the structures along the path
    """

    if not path_strs:
        return

    atlist = [i + 1 for i in range(path_strs[0].atom_total)]
    # this imposes structure 1 on 0, 2 on 1, 3 on 2, etc.
    for st1, st2 in zip(path_strs[1:], path_strs[:-1]):
        rms = rmsd.superimpose(st2, atlist, st1, atlist, move_which=rmsd.CT)


[docs]def order_atoms(reactant, product, debug=False):
    """
    Renumber atoms in reactant and product in a consistent fashion.

    :type reactant: schrodinger.structure.Structure
    :param reactant: reactant structure
    :type product: schrodinger.structure.Structure
    :param product: reactant structure
    :rtype: tuple of two structures
    :return: renumbered reactant and product

    """

    r_copy = reactant.copy()
    p_copy = product.copy()

    # first try using chirality
    atlist = range(1, r_copy.atom_total + 1)
    mapper = AutoTSAtomMapper(optimize_mapping=True,
                              use_chirality=True,
                              debug=debug)

    try:
        r_new, p_new = mapper.reorder_structures(r_copy, atlist, p_copy, atlist)
    except ConformerError as e:
        mapper = AutoTSAtomMapper(optimize_mapping=True,
                                  use_chirality=False,
                                  debug=debug)

        r_new, p_new = mapper.reorder_structures(r_copy, atlist, p_copy, atlist)

    copy_autots_st_properties(reactant, r_new)
    copy_autots_st_properties(product, p_new)

    return r_new, p_new


[docs]class AutoTSAtomMapper(BaseAtomMapper):
    """
    Atom mapper used by AutoTS for reordering atoms of conformers.
    Uses element types and optionally chirality to equate atoms
    and all bonds are considered equivalent.
    Stereochemistry on atoms which are marked with the property
    %s are ignored when comparing stereochemistry.
    This property is used to mark the atoms in the reaction center.

    Usage example:
    mapper = AutoTSAtomMapper(optimize_mapping=True, use_chirality=False)
    st1, st2 = mapper.reorder_atoms(st1, range(1, st1.atom_total + 1), st2, range(st2.atom_total + 1))
    """ % autots_constants.IGNORE_ATOM_STEREO
    RMSD_THRESH = 1.0e-8

[docs]    def __init__(self, optimize_mapping, use_chirality, debug=False):
        """
        :type optimize_mapping: boolean
        :param optimize_mapping: if True search over all bijections to
                                 find the one with lowest score
        :type use_chirality: boolean
        :param use_chirality: if True, in addition to element type use
                              chirality (where defined) to equate atoms.
        """
        super(AutoTSAtomMapper, self).__init__(optimize_mapping, debug)
        self.use_chirality = use_chirality

[docs]    def initialize_atom_types(self, st, invert_stereocenters=False):
        """
        Initialize the atom types

        :type st: Structure
        :param st: structure containing atoms
        :type invert_stereocenters: boolean
        :param invert_stereocenters: whether or not R/S labels should be flipped
        """

        def atom_label(atom):
            """
            creates a label which is typically just the element name
            but also contains a special atom class integer if the atom
            has the property %s
            """ % autots_constants.AUTOTS_ATOM_CLASS
            atom_class = atom.property.get(
                str(autots_constants.AUTOTS_ATOM_CLASS), "")

            label = atom.element
            if atom_class:
                label += "-class=%s" % atom_class
            return label

        # uses element-chirality
        if self.use_chirality:
            #Discard Lewis data before determining chirality
            st_copy = discard_lewis_data(st)

            ch = chirality.get_chirality(st_copy)
            if invert_stereocenters:
                self.invert_chirality(ch)
            for at in st.atom:
                # have to get rid of these atom number chiralities here
                # since not using Lewis structure, E/Z isomerism doesn't apply
                if ch[at.index - 1] in ["ANR", "ANS", "E", "Z"]:
                    ch[at.index - 1] = None
                self.set_atom_type(
                    at,
                    atom_label(at) + "-chiral=%s" % ch[at.index - 1])
        else:
            for at in st.atom:
                self.set_atom_type(at, atom_label(at))

[docs]    def score_mapping(self, st1, st2, atset):
        """
        Score a mapping over the atoms listed in atlist.
        This returns a number of chirality mis-alignments and an rms
        mis-alignments are neglected for atoms which are marked with
        atom property %s

        :type st1: Structure
        :param st1: first structure
        :type st2: Structure
        :param st2: second structure
        :type atset: set of ints
        :param atset: set of atoms that have been mapped (indexes refer to both structures)
        :rtype: 5-tuple of (negative) int, (non-negative) int, (non-positive) int,
                (positive) int, and real
        :return: the negative of the number of active bonds, the number of misaligned
                 atom-numbering stereocenters, the negative of the number of active atoms,
                 the number of separate components, and the rmsd
        """ % autots_constants.IGNORE_ATOM_STEREO

        atlist = list(atset)
        # move a copy
        st1_working = st1.copy()
        st2_working = st2.copy()
        rms = rmsd.superimpose_substructure_molecules(st1_working, atlist,
                                                      st2_working, atlist)

        # according to cProfile this is the bottleneck
        ch1 = chirality.get_atom_numbering_chirality(st1_working)
        ch2 = chirality.get_atom_numbering_chirality(st2_working)
        misalignments = 0

        # only compare chirality for atoms which have all their
        # neighbors mapped
        atlist_neighbors_mapped = [
            i for i in atlist if all(
                [at.index in atset for at in st1_working.atom[i].bonded_atoms])
        ]

        for i in atlist_neighbors_mapped:
            ignore_stereo = st1.atom[i].property.get(autots_constants.IGNORE_ATOM_STEREO, False) or \
                            st2.atom[i].property.get(autots_constants.IGNORE_ATOM_STEREO, False)
            if (chirality.valid_chiral_comp(ch1[i - 1], ch2[i - 1]) and
                    ch1[i - 1] != ch2[i - 1]) and not ignore_stereo:
                misalignments += 1

        # In order to encourage larger n-membered TS's, we prefer solutions that
        # increase the number of active atoms (even at the expense of RMSD).
        # We include the negative of the number of active atoms to maximize this when
        # we take the minimum score later.
        # renumber.py has added this property to the active atoms for the purposes of
        # renumbering. It also handles removing the label when renumbering is complete.
        active_indices = set()
        for i in atlist:
            if st1.atom[i].property.get(autots_constants.IGNORE_ATOM_STEREO, False) or \
               st2.atom[i].property.get(autots_constants.IGNORE_ATOM_STEREO, False):
                active_indices.add(i)

        n_active_bonds, n_components = self.determine_active_bonds_info(
            st1, st2, atlist)

        score = (-n_active_bonds, misalignments, -len(active_indices),
                 n_components, rms)
        if self.debug:

            ignored_atoms = []
            for i in atlist_neighbors_mapped:
                if st1.atom[i].property.get(autots_constants.IGNORE_ATOM_STEREO, False) or \
                   st2.atom[i].property.get(autots_constants.IGNORE_ATOM_STEREO, False):
                    ignored_atoms.append(i)

            print("ignoring stereo for the following atoms", ignored_atoms)

            st1_working.append("scoring.mae")
            st2_working.append("scoring.mae")
            file_logger.register_file("scoring.mae")

            print("score", score)
            print("misalignments:")
            for i in atlist_neighbors_mapped:
                if ch1[i - 1] != ch2[i - 1]:
                    print(i, ch1[i - 1], ch2[i - 1])

        return score

[docs]    def score_is_equivalent(self, score1, score2):
        """
        Here we declare 2 scores equivalent if the same number of chirality
        mismatches are present and the rmsd difference is within RMSD_THRESH.
        This resolves machine dependent descrepancies in the chosen map.

        :type score1: tuple
        :param score1: the first score (chirality mismatches, active atoms,
            rms)

        :type score2: tuple
        :param score2: the first score (chirality mismatches, active atoms,
            rms)

        :return: boolean indicating if the score is the same or different
        """

        same_int_scores = score1[:4] == score2[:4]
        same_rms = abs(score1[4] - score2[4]) < self.RMSD_THRESH
        return same_int_scores and same_rms

[docs]    def determine_active_bonds_info(self, st1, st2, atlist):
        """
        Determine the number of active bonds after numbering and the number of components.

        It is possible for the numbering code to "deactivate" an active
        bond and such numbering should be considered invalid. The way this
        might happen is for the numbering to lead to two active bonds to be numbered
        equivalently. Say you have a reactant water with an O-H active and a product
        water with an O-H active (for example, in a water wire). The numbering code
        could decide to number both of these O's the same and both H's the same. Thus,
        the code thinks it has both O-H's active but, in reality, now neither are because
        the equivalently numbered atoms have bonds in both reactant and product.

        The active bonds have been stored as a binary vector (i.e. an int). An active bond-
        destroying numbering will have the same two atom numbers as the endpoint of more than
        one active bond.

        We define the number of 'components' as the number of disjoint sets of molecules that
        are involved in active bonds. That is, if all active bonds in reactants and products
        were drawn in one structure, how many "molecules" would there be? For example, it is
        possible to number a set of bonds in the reaction A+B+C+D -> E+F, where A+B->E and C+D->F
        or one in which atoms from all 4 reactant molecules appear among all 2 of the products.
        The former case is a 2-component reaction (consisting of (A, B, E) and (C, D, F)) and
        the latter is a 1-component reaction (A, B, C, D, E ,F). We assume that the user put in
        an irreducible reaction, and hence fewer component renumbered solutions are to be
        preferred.

        :type st1: Structure
        :param st1: first structure
        :type st2: Structure
        :param st2: second structure
        :type atlist: list of ints
        :param atlist: list of atoms that have been mapped (indexes refer to both structures)
        :rtype: (int, int)
        :return: number of active bonds from this numbering, number of components
        """

        def get_set_bits(bond_bitstring):
            """
            Get the bits that are set in the int listing the active bonds

            We check if the last bit is set (by anding with 1) and, if so,
            append its position to a list. We then bit-shift the entire int to
            the right (which deletes the bit just checked and moves the
            next-to-last bit to the last bit). We continue checking and
            shifting until the bitstring is exhausted (i.e. there are no bits
            set anywhere, which happens when the bitstring is 0)
            """
            output = []
            counter = 0
            while bond_bitstring != 0:
                if bond_bitstring & 1:
                    output.append(counter)
                bond_bitstring = bond_bitstring >> 1
                counter += 1
            return output

        a_bond_list = collections.defaultdict(list)
        for at in atlist:
            for st in [st1, st2]:
                bond_bitstring = st.atom[at].property.get(
                    autots_constants.BOND_BITSTRING, 0)
                for bond in get_set_bits(bond_bitstring):
                    a_bond_list[bond].append(at)

        bonds = set()
        # Make sure all active bonds found are valid
        for bond in a_bond_list.values():
            if len(bond) == 2:
                bonds.add(tuple(sorted(bond)))

        st_all = st1.copy()
        st_all.addBonds([(i, j, 1) for i, j in bonds])

        return len(bonds), st_all.mol_total


[docs]class AutoTSTemplateAtomMapper(AutoTSAtomMapper):
    """
    Atom mapper used by AutoTS's template code for reordering atoms
    to align template and input.
    Uses element types to equate atoms and all bonds are considered equivalent.
    Stereochemistry on atoms which are marked with the property
    %s are ignored when comparing stereochemistry.

    This differs from the AutoTSAtomMapper in that it simultaneously renumbers
    4 structures instead of 2, assuming two pairs of those structures are already
    consistently numbered. Moreover, the return value of this class is the optimal map
    that accomplishes the renumbering, as opposed to the renumbered Structures themselves.

    Usage example:
    mapper = AutoTSTemplateAtomMapper(optimize_mapping=True, use_chirality=False)
    map = mapper.choose_template_map(reactant, product, input_indexes, r_template, p_template, template_indexes)
    """ % autots_constants.IGNORE_ATOM_STEREO
    RMSD_THRESH = 1.0e-8

[docs]    def initialize_atom_types(self, st, active_ats, invert_stereocenters=False):
        """
        Initialize the atom types

        :type st: Structure
        :param st: structure containing atoms
        :type active_ats: list of ints
        :param active_ats: atom indices of the reaction center
        :type invert_stereocenters: boolean
        :param invert_stereocenters: whether or not R/S labels should be flipped
        """
        for at in st.atom:
            if autots_constants.AUTOTS_ATOM_CLASS in at.property:
                del at.property[autots_constants.AUTOTS_ATOM_CLASS]

        super().initialize_atom_types(st, invert_stereocenters)
        for at in active_ats:
            at_type = self.get_atom_type(st.atom[at])
            at_type += ACTIVE_ATOM
            self.set_atom_type(st.atom[at], at_type)

    def _order_atoms(self, st1, atset1, st2, atset2, st3, st4, input_rc,
                     template_rc):
        """
        Order atoms by first recursively removing
        topologically degenerate terminal atoms

        :type st1: Structure
        :param st1: the first structure
        :type atset1: set of ints
        :param atset1: set of atom indexes defining the first substructure
        :type st2: Structure
        :param st2: the second structure
        :type atset2: set of ints
        :param atset2: set of atom indexes defining the second substructure
        :type st3: Structure
        :param st3: the third structure, already assumed to be numbered
                    consistently with st1
        :type st4: Structure
        :param st4: the fourth structure, already assumed to be numbered
                    consistently with st2
        :type input_rc: set of ints
        :param input_rc: reaction center atom indices of st1, st3
        :type template_rc: set of ints
        :param template_rc: reaction center atom indices of st2, st4
        :return: the four structures with structure 2, 4 having had atoms reordered
                 and the overall map which accomplished this
        """

        st1_renorm, k1, e1 = self._eliminate_degenerate_terminal_atoms(
            st1, atset1)
        st2_renorm, k2, e2 = self._eliminate_degenerate_terminal_atoms(
            st2, atset2)
        st3_renorm, k3, e3 = self._eliminate_degenerate_terminal_atoms(
            st3, atset1)
        st4_renorm, k4, e4 = self._eliminate_degenerate_terminal_atoms(
            st4, atset2)

        # check elimination for similarity or if we tried to eliminate the rxn center
        if not (self.isomeric_atom_sets(st1, e1, st2, e2) and
                self.isomeric_atom_sets(st2, e2, st3, e3) and
                self.isomeric_atom_sets(st3, e3, st4, e4)) or len(
                    e1.intersection(input_rc)) > 0 or len(
                        e2.intersection(template_rc)) > 0:
            e1.clear()
            e2.clear()
            e3.clear()
            e4.clear()

        # if we've eliminated any terminal atoms recurse
        if e1 and e2 and e3 and e4:
            st1_reord, st2_reord, st3_reord, st4_reord, overall_map = self._order_atoms(
                st1_renorm, k1, st2_renorm, k2, st3_renorm, st4_renorm,
                input_rc, template_rc)
            # order eliminated atoms
            # because st1-st3 (r and p) and st2-st4 (r_template and p_template) are consistently numbered
            # one actually need only consider st1 and st2 and then apply the same map selection to the
            # other two st.
            st1_reord, st2_reord, st3_reord, st4_reord, app_map = self._append_terminal_atoms(
                st1_reord, e1, st2_reord, e2, st3_reord, st4_reord, k1)
            overall_map.update(app_map)
        # order core
        else:
            st1_reord, st2_reord, st3_reord, st4_reord, overall_map = self._order_core_atoms(
                st1, atset1, st2, atset2, st3, st4, input_rc, template_rc)

        return st1_reord, st2_reord, st3_reord, st4_reord, overall_map

    def _append_terminal_atoms(self, st1, term1, st2, term2, st3, st4, core):
        """
        Map the terminal atoms in the list term1 and term2.
        The atom indexes in the list core should be consistently
        ordered between st1 and st2.

        :type st1: Structure
        :param st1: the first structure
        :type term1: set of ints
        :param term1: set of terminal atom indexes in st1
        :type st2: Structure
        :param st2: the second structure
        :type term2: set of ints
        :param term2: set of terminal atom indexes in st2
        :type st3: Structure
        :param st3: the third structure, already assumed to be numbered
                    consistently with st1
        :type st4: Structure
        :param st4: the fourth structure, already assumed to be numbered
                    consistently with st2
        :type core: set of atoms in core structure which are already
                    numbered consistently
        :return: four new structures, map
        """

        # keeps track of which atoms have been mapped
        overall_map = dict()
        have_mapped = core.copy()
        for icore in core:

            types1 = self._group_neighbors(st1.atom[icore], term1)
            types2 = self._group_neighbors(st2.atom[icore], term2)

            # check that name of groups and lengths are the same
            group_cnts1 = {grp: len(types1[grp]) for grp in types1}
            group_cnts2 = {grp: len(types2[grp]) for grp in types2}
            if group_cnts1 != group_cnts2:
                raise ConformerError(
                    "Groups are not the same when appending terminal atoms")

            # each group has a list of mappings
            group_mappings = collections.defaultdict(list)
            for group in types1:
                for p in itertools.permutations(types2[group],
                                                len(types2[group])):
                    group_mappings[group].append(list(zip(types1[group], p)))

            # the set of atoms getting mapped
            mapped_atoms = set()
            for lst in types1.values():
                mapped_atoms.update(lst)

            # cartesian product of all groups to get
            # all mappings of this atoms neighbors
            best_score = None
            best_maps = []
            for mapping in itertools.product(*group_mappings.values()):

                # get a mapping of terminal atoms attached to this core
                full_list = []
                for group_mapping in mapping:
                    full_list.extend(group_mapping)

                # set initial map to None
                full_map = [None for i in range(st2.atom_total)]
                # map everything that has been mapped
                for j in have_mapped:
                    full_map[j - 1] = j

                # map this grouping of atoms

                for group_mapping in mapping:
                    for i, j in group_mapping:
                        full_map[i - 1] = j
                # fill in Nones
                _fill_map(full_map)

                # RMS is computed over core atom and added terminals
                # whereas chirality need only be computed for the new core atom
                # this is because chirality is determined by core + terminal
                st2_reord = build.reorder_atoms(st2, full_map)
                st4_reord = build.reorder_atoms(st4, full_map)
                all_mapped = have_mapped.union(mapped_atoms)
                score_r = self.score_mapping(st1, st2_reord, all_mapped)
                score_p = self.score_mapping(st3, st4_reord, all_mapped)

                score = tuple(map(sum, zip(score_r, score_p)))

                map_score = MapScore(score, full_map)
                self._update_map_list(best_maps, map_score)

                if not self.optimize_mapping:
                    break

            ## now pick the best map. If there are ties, choose (entirely arbitrarily)
            ## the one that preserves the relative ordering of the atoms being reordered
            ## as best as possible
            if best_maps:
                best_score, best_mapping = min(best_maps,
                                               key=lambda bij: bij.atom_map)

            # save the overall map
            for at in mapped_atoms:
                overall_map[at] = st2.atom[best_mapping[at - 1]].property[
                    self.MAPPER_INDEX]

            if self.debug:
                if best_maps:
                    print("Best app maps")
                    for mapping in best_maps:
                        print(str(mapping.score))
                print("overall map in append", overall_map)

            st2 = build.reorder_atoms(st2, best_mapping)
            st4 = build.reorder_atoms(st4, best_mapping)

            # we just mapped terminal atoms
            have_mapped.update(mapped_atoms)

        return st1, st2, st3, st4, overall_map

    def _order_core_atoms(self, st1, atset1, st2, atset2, st3, st4, input_rc,
                          template_rc):
        """
        re-order the atoms atset2 in st2/st4 to be consistent
        with atset1 in st1/st3.  Graphs made from the atom
        indexes listed in atset1 and atset2 must be isomorphic.

        :type st1: Structure
        :param st1: the first structure
        :type atset1: set of ints
        :param atset1: set of atom indexes defining the first substructure
        :type st2: Structure
        :param st2: the second structure
        :type atset2: list of ints
        :param atset2: list of atom indexes defining the second substructure
        :type st3: Structure
        :param st3: the third structure, already assumed to be numbered
                    consistently with st1
        :type st4: Structure
        :param st4: the fourth structure, already assumed to be numbered
                    consistently with st2
        :type input_rc: set of ints
        :param input_rc: reaction center atom indices of st1, st3
        :type template_rc: set of ints
        :param template_rc: reaction center atom indices of st2, st4
        :return: the four structures with structure 2, 4 having had atoms reordered
                 and the overall map which accomplished this
        """

        if len(atset1) != len(atset2):
            raise ConformerError("length of atom lists in core region differ")

        r_graph = self.st_to_graph(st1, atset1)
        r_template_graph = self.st_to_graph(st2, atset2)
        p_graph = self.st_to_graph(st3, atset1)
        p_template_graph = self.st_to_graph(st4, atset2)

        r_matcher = isomorphism.GraphMatcher(r_graph, r_template_graph,
                                             self.comparator)
        p_matcher = isomorphism.GraphMatcher(p_graph, p_template_graph,
                                             self.comparator)

        best_maps = []
        overall_map = dict()
        for trial_map in _template_map_iter(r_matcher, p_matcher, input_rc,
                                            template_rc):
            atom_map = [None for i in range(st2.atom_total)]
            for k, v in trial_map.items():
                atom_map[k - 1] = v
            _fill_map(atom_map)

            r_t_reord = build.reorder_atoms(st2, atom_map)
            p_t_reord = build.reorder_atoms(st4, atom_map)

            score_r = self.score_mapping(st1, r_t_reord, atset1)
            score_p = self.score_mapping(st3, p_t_reord, atset1)
            score = tuple(map(sum, zip(score_r, score_p)))
            map_score = MapScore(score, atom_map)

            self._update_map_list(best_maps, map_score)

        if best_maps:
            best_score, best_mapping = min(best_maps,
                                           key=lambda bij: bij.atom_map)
        else:
            raise TemplateAtomMapFailure

        r_t_reord = build.reorder_atoms(st2, best_mapping)
        p_t_reord = build.reorder_atoms(st4, best_mapping)
        for i in atset1:
            overall_map[i] = best_mapping[i - 1]

        if self.debug:
            print("best_score", best_score)
            print(overall_map)

        return st1, r_t_reord, st3, p_t_reord, overall_map

[docs]    def choose_template_map(self, reactant, product, input_indexes, r_template,
                            p_template, template_indexes):
        """
        Determine the optimal map from the input reactant and product structures
        to the template reactant and product structures.

        :type reactant: Structure instance
        :param reactant: input reactant structure
        :type product: Structure instance
        :param product: input product structure
        :type input_indexes: RxnIndxDecomp instance
        :param input_indexes: breakdown of input atom indexes into core,
                              reaction center
        :type r_template: Structure instance
        :param r_template: template reactant structure
        :type p_template: Structure instance
        :param p_template: template product structure
        :type template_indexes: RxnIndxDecomp instance
        :param template_indexes: breakdown of template atom indexes into core,
                                 reaction center
        :rtype: dict
        :return: mapping from input number to template numbering
        """
        r_working = reactant.copy()
        p_working = product.copy()
        r_t_working = r_template.copy()
        p_t_working = p_template.copy()
        for sts, indexes in zip(
            [[r_working, p_working], [r_t_working, p_t_working]],
            [input_indexes, template_indexes]):
            for st in sts:
                self.initialize_atom_types(st, indexes.rxn_center)
                for at in indexes.rxn_center:
                    st.atom[at].property[
                        autots_constants.IGNORE_ATOM_STEREO] = True
                for at in st.atom:
                    at.property[self.MAPPER_INDEX] = at.index

        # Because our scoring works by reordering the structures, the actual map
        # determination must involve the system with more atoms being mapped onto the
        # system with fewer atoms (since you can't reorder the system with fewer atoms to
        # have atom indices greater than its number of atoms.
        # We simply reverse the map if need be to ensure a map from input to template
        if r_working.atom_total <= r_t_working.atom_total:
            r_working, r_t_working, p_working, p_t_working, overall_map = self._order_atoms(
                r_working, set(input_indexes.core), r_t_working,
                set(template_indexes.core), p_working, p_t_working,
                set(input_indexes.rxn_center), set(template_indexes.rxn_center))
            if self.debug:
                r_t_working.write("rt_reord.mae")
                file_logger.register_file("rt_reord.mae")
                p_t_working.write("pt_reord.mae")
                file_logger.register_file("pt_reord.mae")
        else:
            r_t_working, r_working, p_t_working, p_working, reverse_map = self._order_atoms(
                r_t_working, set(template_indexes.core), r_working,
                set(input_indexes.core), p_t_working, p_working,
                set(template_indexes.rxn_center), set(input_indexes.rxn_center))
            overall_map = {v: k for k, v in reverse_map.items()}
            if self.debug:
                r_working.write("rt_reord_rev.mae")
                file_logger.register_file("rt_reord_rev.mae")
                p_working.write("pt_reord_rev.mae")
                file_logger.register_file("pt_reord_rev.mae")

        if self.debug:
            print("final overall_map", overall_map)

        return overall_map


def _template_map_iter(r_matcher, p_matcher, input_rc, template_rc):
    """
    Iterator over valid template isomorphism atom maps.

    Valid atom maps must exist between both reactants and product and map
    the reaction center to the reaction center.
    Since template r and p are consistently numbered and input r and p are consistently
    numbered, only maps that appear in both lists are good maps to use.
    That is, symmetry breaking may only occur on one side, a map that appears in one
    may not take this symmetry breaking into account.

    :type r_matcher: GraphMatcher instance
    :param r_matcher: iterator over isomorphic mappings of the input reactant to
                      template reactant
    :type p_matcher: GraphMatcher instance
    :param p_matcher: iterator over isomorphic mappings of the input product to
                      template product
    :type input_rc: list of ints
    :param input_rc: atom indices of input reaction center
    :type template_rc: list of ints
    :param template_rc: atom indices of template reaction center
    """

    def image(mapping, domain):
        """
        return image of a map given domain
        """
        return set(v for k, v in mapping.items() if k in domain)

    p_maps = set(
        frozenset(isomorph.items())
        for isomorph in p_matcher.isomorphisms_iter()
        if image(isomorph, input_rc) == set(template_rc))
    for isomorph in r_matcher.isomorphisms_iter():
        if frozenset(isomorph.items()) in p_maps:
            yield isomorph