Source code for schrodinger.application.pathfinder.route

"""
This module contains Node classes for representing retrosynthetic analyses and
synthetic routes, as well as functions for reading and writing route files.
"""

import copy
import fnmatch
import itertools
import json
import os

import voluptuous
from rdkit import Chem
from voluptuous import All
from voluptuous import Any
from voluptuous import Range
from voluptuous import Required
from voluptuous import Schema

from schrodinger.application.pathfinder import reaction
from schrodinger.job import jobcontrol

# Schemas for validating JSON files.
reagent_node_schema = Schema(
    Any(
        {
            Required(Any('file', 'smiles', 'reagent')): str,
            Any('steps', 'depth'): int,
            'frozen_atoms': [int],
        },
        {
            Required('smiles_list'): [str],
            Any('file', 'reagent'): str,
            Any('steps', 'depth'): int,
            'frozen_atoms': [int],
            'innerfrozen_atoms': [int],
        },
    ))

# hack to allow recursive spec below
_reaction_node_schema = lambda v: reaction_node_schema(v)

reaction_node_schema = Schema({
    'reaction_name': str,
    'smiles': str,
    'ncycles': All(int, Range(min=1)),
    'fallback_reagent': int,
    'precursors': [Any(_reaction_node_schema, reagent_node_schema)],
    Any('steps', 'depth'): int,
})

route_file_schema = Schema(Any(reaction_node_schema, reagent_node_schema))

# This is a simplified schema because the values of 'route' and 'reactions' will
# be validated later anyway using their own schemata.
self_contained_route_schema = Schema({
    Required('route'): dict,
    Required('reactions'): dict,
})

# Atom properties to label frozen atoms. Frozen and innerfrozen atoms must be
# present in the product of a synthesis, and innerfrozen atoms are not allowed
# to form new bonds.
FROZEN_PROP = 'b_reaction_frozen'
INNERFROZEN_PROP = 'b_reaction_innerfrozen'
TARGET_ATOM_IDX_PROP = 'i_reaction_target_idx'

# Hidden property to hold the original CXSMILES of a reactant.
CXSMILES_PROP = '_cxsmiles'

# Parameters used for isotope labeling of frozen atoms.
INNERFROZEN_OFFSET = 10000
FROZEN_FACTOR = 1000

SMILES = "SMILES"


[docs]class ReactionInstance(object):
    """
    A ReactionInstance is the application of a Reaction to a list of
    reagents/precursors. For example, "Amide synthesis" is a Reaction; but
    "Amide synthesis from acetic acid and ethylamine" is a ReactionInstance.
    """

[docs]    def __init__(self, reaction, precursors, broken_bonds=frozenset()):  # noqa
        self.reaction = reaction
        self.precursors = precursors
        self.broken_bonds = broken_bonds

    @property
    def name(self):
        """
        The name of the reaction.
        """
        return self.reaction.name


[docs]class Node(object):
    """
    Base class for a node in a synthetic route or a retrosynthetic tree.
    A node is associated with a molecule and with a reagent class (both of
    which may be None). It is also associated with zero or more reaction
    instances, but the base class does not implement an API to add reaction
    instances because each subclass has a different policy concerning the
    number of reaction instances allowed.
    """

[docs]    def __init__(self, mol=None, reagent_class=None):
        """
        :type mol: Mol or NoneType
        :type reagent_class: str or NoneType
        """
        self.mol = mol
        self.reagent_class = reagent_class
        self.reaction_instances = []
        self.display_name = ''

[docs]    def treeAsString(self,
                     products=True,
                     starting_materials=True,
                     indexes=True):
        """
        Return a recursive string representation of the tree (unlike
        __str__, which is a short representation of the current node).
        The reaction names are always shown; starting materials and products are
        optional.

        :param products: include reaction product SMILES
        :type products: bool

        :param starting_materials: include starting material SMILES
        :type starting_materials: bool

        :rtype: str
        """
        counter = itertools.count(1) if indexes else None
        return '\n'.join(
            self._asStringList(products, starting_materials, counter, 0))

    def _asStringList(self, products, starting_materials, counter, depth):
        lines = []
        if products:
            indent = " " * 8 * depth
            half_indent = " " * 4
        else:
            indent = " " * 4 * depth
            half_indent = ""
        if products or (not self.reaction_instances and starting_materials):
            if counter and not self.reaction_instances:
                count = "%d: " % next(counter)
            else:
                count = ''
            lines.append("%s%s%s" % (indent, count, self))
        for rxn_inst in self.reaction_instances:
            lines.append("%s%s%s" % (indent, half_indent, rxn_inst.reaction))
            for precursor in rxn_inst.precursors:
                lines.extend(
                    precursor._asStringList(products, starting_materials,
                                            counter, depth + 1))
        return lines

[docs]    def getReactionSet(self):
        """
        Return the set of reactions used by the route.
        """
        rxn_set = set()
        for ri in self.reaction_instances:
            rxn_set.add(ri.reaction)
            for precursor in ri.precursors:
                rxn_set |= precursor.getReactionSet()
        return rxn_set


[docs]class RetroSynthesisNode(Node):
    """
    A node in a retrosynthetic analysis tree. A node may have one or more
    reaction instances, which represent the ways of synthesizing the node
    in a single step.
    """

[docs]    def __init__(self, *a, frozen=False, **d):
        """
        :param frozen: The molecule associated with the current node has
            frozen atoms. When generating a route, ReagentNode's derived from
            the current node won't have a reagent class, so they won't be
            enumerated by default.
        :type frozen: bool
        """
        super().__init__(*a, **d)
        self.frozen = frozen

    def __str__(self):
        return Chem.MolToSmiles(self.mol)

[docs]    def addReactionInstance(self,
                            reaction,
                            precursors,
                            broken_bonds=frozenset()):  # noqa
        """
        Add a reaction instance to the current node. This represents a one-step
        synthesis of the node from a list of precursor nodes.

        :type reaction: Reaction
        :type precursors: list of Node-derived objects

        :param broken_bonds: set of bonds broken by this reaction instance,
            where each bond is a tuple of two ints (sorted atom indexes in the
            original target molecule).
        :type broken_bonds: {(int, int)}
        """
        self.reaction_instances.append(
            ReactionInstance(reaction, precursors, broken_bonds))

    def _getRoutes(self, include_zero=False, bond_reactions=None):
        """
        Enumerate all the possible routes to synthesize the target. This is a
        lower-level version of getRoutes() which doesn't do any filtering or
        deduplication.

        :param include_zero: if True, the first route on the list will be
            the "zero-step route" where the target is the starting material.
        :type include_zero: bool

        :param bond_reactions: dict specifying which reactions are allowed to
                break certain bonds. Keys are tuples of two ints (sorted atom
                indexes); values are sets of reaction names.
        :param bond_reactions: {(int, int): set(str)}

        :rtype: list of RouteNode
        """

        def broken_bonds_ok(rxn_inst):
            for bond in rxn_inst.broken_bonds & bond_reactions.keys():
                if rxn_inst.name not in bond_reactions[bond]:
                    return False
            return True

        if include_zero and (self.frozen or self.reagent_class):
            reagent_class = None if self.frozen else self.reagent_class
            yield ReagentNode(mol=self.mol, reagent_class=reagent_class)
        for rxn_inst in self.reaction_instances:
            if bond_reactions and not broken_bonds_ok(rxn_inst):
                # We do some broken bond filtering here and not as
                # post-processing in getRoutes as an early pruning optimization.
                continue
            precursor_routes = [
                precursor._getRoutes(True, bond_reactions)
                for precursor in rxn_inst.precursors
            ]
            for precursors in lazy_product(*precursor_routes):
                route = RouteNode(self.mol, self.reagent_class)
                route.setReactionInstance(rxn_inst.reaction, precursors,
                                          rxn_inst.broken_bonds)
                yield route

[docs]    def getRoutes(self,
                  require=None,
                  seen=None,
                  full_dedup=False,
                  bond_reactions=None):
        """
        Generate the routes from a retrosynthesis tree.

        :param require: reaction tags or names to require in each route
        :type require: set of str

        :param seen: set of routes that have already been seen and shouldn't be
                generated again. Modified in place. This can be used to prevent
                duplicates when generating routes from multiple retrosynthesis
                trees.
        :type seen: set of str

        :param full_dedup: "full deduplication": do not generate routes consisting
                of the same reaction path but with different molecules
        :type full_dedup: bool

        :param bond_reactions: dict specifying which reactions are allowed to
                break certain bonds. Keys are tuples of two ints (sorted atom
                indexes); values are sets of reaction names.
                Only routes which break all the bonds specified, using one of the
                reactions specified for each bond, are generated.
        :param bond_reactions: {(int, int):set(str)}

        :return: generator of routes (RouteNode)
        :rtype: generator
        """
        if seen is None:
            seen = set()
        for route in self._getRoutes(bond_reactions=bond_reactions):
            # When deduping, we omit molecules from the tree we use as a key, and
            # only use the reactions.
            key = route.treeAsString(products=not full_dedup,
                                     starting_materials=not full_dedup)
            if require and not route.checkReactions(require):
                continue
            if key in seen:
                continue
            if bond_reactions:
                # Make sure that all specified bonds were actually broken.
                if not (bond_reactions.keys() <= route.brokenBonds().keys()):
                    continue
            seen.add(key)
            yield route


[docs]class RouteNode(Node):
    """
    A node in a synthetic route. Similar to RetroSynthesisNode, except that it
    can have one reaction instance at most. Also, RouteNode provides additional
    methods that only make sense for a synthetic route.
    """

[docs]    def __init__(self,
                 mol=None,
                 reagent_class=None,
                 ncycles=None,
                 fallback_reagent=None,
                 include_smiles=True):
        """
        :param mol: molecule associated with this node, if any
        :type mol: Mol or NoneType

        :param reagent_class: reagent class associated with this node, if any
                              (only really relevant for starting materials)
        :type reagent_class: str or NoneType

        :param ncycles: used by the Synthesizer: try to apply the reaction for
                        this node up to ncycles times
        :type ncycles: int or NoneType

        :param fallback_reagent: used by the Synthesizer: when a reaction fails,
                                 the reagent identified by this 1-based index
                                 becomes the product
        :type fallback_reagent: int or NoneType

        :param include_smiles: whether to include the SMILES string for each
                               node.
        :type include_smiles: bool
        """
        super().__init__(mol, reagent_class)
        self.ncycles = ncycles
        self.fallback_reagent = fallback_reagent
        self.include_smiles = include_smiles
        self.target_atom_index_map = {}

    def __str__(self):
        if self.mol:
            return Chem.MolToSmiles(self.mol)
        else:
            return "None"

    @property
    def precursors(self):
        """
        The list of precursors of this node (may be empty).
        """
        if self.isStartingMaterial():
            return []
        else:
            return self.reaction_instance.precursors

    @property
    def reaction_instance(self):
        """
        The reaction instance associated with this node. If the node has no
        reaction instance, raises KeyError.
        """
        return self.reaction_instances[0]

    @property
    def reaction(self):
        """
        The reaction associated with this node. If the node has no
        reaction instance, raises KeyError.
        """
        return self.reaction_instance.reaction

[docs]    def isStartingMaterial(self):
        """
        Return True if the node represents a starting material (i.e., has no
        reaction instance).
        """
        return not self.reaction_instances

[docs]    def getStartingNodes(self):
        """
        Search recursively and return the Node objects for all the starting
        materials in the route.

        :rtype: list of Node-derived objects
        """
        if self.isStartingMaterial():
            return [self]
        else:
            return sum((p.getStartingNodes() for p in self.precursors), [])

[docs]    def updateTargetIndexMap(self, target_index_map):
        """
        Store target frozen atom indices for alignment

        :param target_index_map: map of reagent_index, atom_index pairs to
            target_indices
        :type target_index_map: {(reagent_index, atom_index): target_index}
        """
        self.target_atom_index_map.update(target_index_map)

[docs]    def setReactionInstance(self,
                            reaction,
                            precursors,
                            broken_bonds=frozenset()):  # noqa
        """
        Set the reaction instance to the current node. This represents a
        one-step synthesis of the node from a list of precursor nodes.

        :type reaction: Reaction
        :type precursors: list of Node-derived objects

        :param broken_bonds: set of bonds broken by this reaction instance,
            where each bond is a tuple of two ints (sorted atom indexes in the
            original target molecule).
        :type broken_bonds: {(int, int)}
        """
        numtargets = reaction.rxn.GetNumReactantTemplates()
        if numtargets != 1:
            raise ValueError(f"Bad reaction '{reaction}'. "
                             "Routes only support one-product reactions.")
        self.reaction_instances = [
            ReactionInstance(reaction, precursors, broken_bonds)
        ]

[docs]    def steps(self):
        """
        Return the total number of steps in the route. For example, the
        following synthesis has 3 steps but depth 2.

            Target => A + B
            A => AA
            B => BB

        :rtype: int
        """
        steps = 0
        for rxn_inst in self.reaction_instances:
            steps += 1
            for precursor in self.precursors:
                steps += precursor.steps()
        return steps

[docs]    def depth(self, _depth=0):
        """
        Return the maximum depth of the route. See example under steps().

        :rtype: int
        """
        if self.isStartingMaterial():
            return _depth
        else:
            return max(p.depth(_depth + 1) for p in self.precursors)

[docs]    def brokenBonds(self, broken_bonds=None):
        """
        Return a dict describing all the bonds broken by the route (going
        recursively down to the starting materials).

        :return: dict where each key is a bond (tuple of two sorted int atoms
            indexes) and each value is a reaction name.
        :rtype:  {(int, int): str}
        """
        if broken_bonds is None:
            broken_bonds = {}
        for rxn_inst in self.reaction_instances:
            for bond in rxn_inst.broken_bonds:
                broken_bonds[bond] = rxn_inst.name
        for precursor in self.precursors:
            precursor.brokenBonds(broken_bonds)
        return broken_bonds

[docs]    def write(self, filename, self_contained=False):
        """
        Write a route file.

        :param filename: File to write.
        :type filename: str

        :param self_contained: Write a "self-contained route file", which
            includes the reactions used by the route?
        :type self_contained: bool
        """
        with open(filename, 'w') as fh:
            tree = self.getTreeData(self_contained=self_contained)
            json.dump(tree, fh, indent=2)
            print(file=fh)

[docs]    def getTreeData(self, self_contained=False):
        """
        Return a simple data structure, suitable for dumping to JSON,
        representing the route. See write() for more details.

        Example return value::

            {
              "reaction_name": "alkylation-1",
              "smiles": "CNCC=O",
              "precursors": [
                {
                  "smiles_list": [
                    "O=CCBr"
                  ],
                  "reagent": "halides-primsec"
                },
                {
                  "reaction_name": "curtius-3",
                  "smiles": "CN",
                  "precursors": [
                    {
                      "smiles_list": [
                        "CC(=O)O"
                      ],
                      "reagent": "carboxylates"
                    }
                  ]
                }
              ],
              "steps": 2,
              "depth": 2
            }

        :param self_contained: Return the data for a "self-contained
            route file", which includes the reactions used by the route?
        :type self_contained: bool

        :return: contents of route file
        :rtype: dict
        """
        tree = self._getTreeData()
        tree['steps'] = self.steps()
        tree['depth'] = self.depth()
        if self_contained:
            used_rxns = {r.name: r.asDict() for r in self.getReactionSet()}
            tree = {'route': tree, 'reactions': used_rxns}
        return tree

    def _getTreeData(self):
        node = self._getNodeData()
        if not self.isStartingMaterial():
            node['precursors'] = [p._getTreeData() for p in self.precursors]
        return node

    def _getNodeData(self):
        node = {'reaction_name': self.reaction_instance.name}
        if self.include_smiles and self.mol:
            node['smiles'] = str(self)
        if self.fallback_reagent is not None:
            node['fallback_reagent'] = self.fallback_reagent
        if self.ncycles is not None:
            node['ncycles'] = self.ncycles
        return node

[docs]    def getSimplifiedTreeData(self, _counter=None):
        """
        Return a data structure suitable for dumping into JSON. This is
        similar to getTreeData but more concise, and is meant for
        a one-line representation, ruughly 100 characters long.

        The same example given for getTreeData would look like this:

            {"alkylation-1": ["O=C([O-])CBr", 2]}

        this format is clearly more limited: there is no reagent class
        information and each starting material must be either a single SMILES
        (i.e., no lists supported) or an integer (meaning a reagent source
        index). The format is also harder to expand in a backward-compatible
        way. Its purpose is not to be an input for an enumeration job, but just
        to provide a small represantation of the route that can be stored as a
        structure property so a user can figure out how a compound was made.

        :return: simplified route representation
        :rtype: dict
        """
        if _counter is None:
            _counter = itertools.count(1)
        precursors = []
        tree = {self.reaction_instance.name: precursors}
        for prec in self.precursors:
            if prec.isStartingMaterial():
                idx = next(_counter)
                if prec.reagent_class:
                    precursors.append(idx)
                else:
                    precursors.append(prec.smiles_list[0])
            else:
                precursors.append(prec.getSimplifiedTreeData(_counter))
        return tree

[docs]    def getOneLineRepresentation(self):
        """
        Return a one-line string with the simplified tree representation
        generated by getSimplifiedTreeData().

        :return: simplified route representation
        :rtype: str
        """
        tree = self.getSimplifiedTreeData()
        return json.dumps(tree)

[docs]    def checkReactions(self, reqs):
        r"""
        Check that the route meets all requirements. Every element of 'reqs'
        must match a tag or reaction name for at least one of the reactions
        used by the route. Tag matching is exact; name matching uses shell-like
        globbing (\*, ?, []).

        :type reqs: set of str
        :return: True if route meets requirements.
        :rtype: bool
        """
        rxn_set = self.getReactionSet()
        names = {r.name for r in rxn_set}
        tags_set = set().union(*(r.tags for r in rxn_set))
        pending = reqs - tags_set - names
        for req in pending:
            if not fnmatch.filter(names, req):
                return False
        return True

[docs]    def getReactionSmiles(self):
        """
        Return a representation of the current node as a reaction SMILES
        (not SMARTS!). The SMILES are kekulized and with explicit single bonds
        where applicable, to maximize compatibility with the sketcher.

        :return: reaction SMILES (retrosynthetic)
        :rtype: str
        """
        reactants = []
        for p in self.precursors:
            reactants.append(get_kekule_smiles(p.mol))
        return get_kekule_smiles(self.mol) + ">>" + ".".join(reactants)


[docs]class ReagentNode(RouteNode):
    """
    A node representing a starting material in a synthetic route. Unlike
    RouteNode, it cannot have any reaction instances. Reagent nodes are
    identified by a reagent class.

    Reagents may optionally have a filename or a smiles or a list of smiles as a
    source of reagent molecules. If none of these is provided, the object can try
    to find a reagent file based on the reagent class alone.
    """

[docs]    def __init__(
        self,
        mol=None,
        reagent_class=None,
        filename=None,
        smiles=None,
        smiles_list=None,
        frozen_atoms=None,
        innerfrozen_atoms=None,
    ):
        """
        :type reagent_class: str
        :type filename: str or NoneType
        :type smiles: str or NoneType
        :type smiles_list: list of str or NoneType

        :param frozen_atoms: 1-based indexes of frozen atoms, refering to the
            atom order in the SMILES string. If smiles_list is provided, it
            must have one smiles.
        :type frozen_atoms: list of int or NoneType
        """
        super().__init__(mol=mol, reagent_class=reagent_class)
        self.filename = filename
        if smiles_list:
            self.smiles_list = smiles_list
        elif smiles:
            self.smiles_list = [smiles]
        elif mol:
            self.smiles_list = [Chem.MolToSmiles(mol)]
        else:
            self.smiles_list = []

        if frozen_atoms is not None:
            if len(self.smiles_list) != 1:
                raise ValueError(
                    'Frozen atoms only supported for a single SMILES')
            self.frozen_atoms = frozen_atoms
            # Old route files don't have innerfrozen_atoms, so default to emtpy.
            self.innerfrozen_atoms = innerfrozen_atoms or []
        else:
            self.frozen_atoms, self.innerfrozen_atoms = self._getFrozenAtomsFromMol(
            )

    def __str__(self):
        attrs = []
        if self.mol:
            attrs.append(Chem.MolToSmiles(self.mol))
        if self.reagent_class:
            attrs.append('reagent="%s"' % self.reagent_class)
        if self.filename:
            attrs.append('file="%s"' % self.filename)
        if self.smiles_list:
            attrs.append('smiles=%s' % self.smiles_list)
        return "%s" % ('; '.join(attrs))

[docs]    def findReagentFile(self, libpath=None):
        """
        First, look for structure files matching <reagent_class>.* in the CWD.
        If one is found, return it. If multiple matches are found, an exception
        is raised. If none are found, look for <reagent_class>.csv in the
        mmshare data directory and return it if it exists, or None otherwise.

        :param libpath: list of directories to prepend to the standard reagent
                        library search path
        :type libpath: list of str

        :return: path to reagent file, or None if not found
        :rtype: str

        :raises: ValueError if multiple matches are found in the CWD.
        """
        try:
            return self.reagent_class.findReagentFile(libpath)
        except AttributeError:
            rc = reaction.ReagentClass(self.reagent_class)
            return rc.findReagentFile(libpath)

[docs]    def getReagentSource(self, source=None, libpath=None):
        """
        Return a reagent source, which may be either a filename, a SMILES
        string, or the magical value route.SMILES which means "use the SMILES
        contained in the node".

        Precedence is the supplied source argument, if any, followed by filename
        associated with the node, reagent class associated with the node, and
        SMILES associated with the node.

        :param source: optional filename/SMILES to use; special value '' (empty
                       strings) means use SMILES associated with the node.
        :type source: str

        :param libpath: list of directories to prepend to the standard reagent
                        library search path
        :type libpath: list of str

        :return: reagent source
        :rtype: str

        :raises: ValueError when no source file is found, the supplied SMILES is
                 invalid, or when there is neither a reagent class nor a SMILES
                 associated with the node.
        """
        if source:
            if os.path.isfile(source) or Chem.MolFromSmiles(source):
                return source
            else:
                raise ValueError(f'"{source}" for reagent {self} '
                                 'is neither a valid file nor a valid SMILES')
        elif source is None:
            if self.filename:
                return jobcontrol.get_runtime_path(self.filename)
            elif self.reagent_class:
                source_from_path = self.findReagentFile(libpath)
                if source_from_path is None:
                    raise ValueError('No reagent file found in search path'
                                     f' for {self.reagent_class}')
                return source_from_path
            else:
                source = ''  # Fall back to SMILES from node.
        if source == '' and self.smiles_list:
            return SMILES
        else:
            raise ValueError(f"No reagent source defined for {self} and node "
                             "contains no SMILES, reagent class, or filename")

    def _getNodeData(self):
        node = {}
        if self.smiles_list:
            node['smiles_list'] = self.smiles_list
        if self.reagent_class:
            node['reagent'] = str(self.reagent_class)
        if self.filename:
            node['file'] = self.filename
        if self.frozen_atoms:
            node['frozen_atoms'] = self.frozen_atoms
        if self.innerfrozen_atoms:
            node['innerfrozen_atoms'] = self.innerfrozen_atoms
        return node

    def _getFrozenAtomsFromMol(self):
        """
        Return a list of 1-based frozen atom indexes (in SMILES order)
        based on the Mol stored by the ReagentNode. Atoms in the Mol are
        identified as frozen by having a true value for FROZEN_PROP.

        :return: lists of frozen and innerfrozen atom indexes
        :rtype: ([int], [int])
        """
        frozen_atoms = []
        innerfrozen_atoms = []
        if self.mol:
            atom_map = self._getSmilesAtomOutputOrder()
            for atom in self.mol.GetAtoms():
                idx = atom.GetIdx()
                try:
                    if atom.GetBoolProp(FROZEN_PROP):
                        frozen_atoms.append(atom_map[idx] + 1)
                except KeyError:
                    pass
                try:
                    if atom.GetBoolProp(INNERFROZEN_PROP):
                        innerfrozen_atoms.append(atom_map[idx] + 1)
                except KeyError:
                    pass
        return sorted(frozen_atoms), sorted(innerfrozen_atoms)

[docs]    def getTargetIndexMap(self, sm_index):
        """
        Return map of (reagent_index, atom_index) pairs to target_indices,
        which will be used to align products by frozen atom.
        :param sm_index: the reagent index to return the map for.
        :type sm_index: int

        :return: map of reagent_index, atom_index pairs to target_indices
        :rtype: {(reagent_index, atom_index): target_index}
        """
        target_atom_index_map = {}
        if self.mol:
            atom_map = self._getSmilesAtomOutputOrder()
            for atom in self.mol.GetAtoms():
                idx = atom.GetIdx()
                try:
                    if atom.GetBoolProp(FROZEN_PROP):
                        target_idx = atom.GetIntProp(TARGET_ATOM_IDX_PROP)
                        target_atom_index_map[str(
                            (sm_index, atom_map[idx] + 1))] = target_idx
                except KeyError:
                    pass
        return target_atom_index_map

    def _getSmilesAtomOutputOrder(self):
        """
        Return a map of the atom indices in the current (stored)
        mol to the order in which the atoms were output during smiles
        generation.

        :return: Map from enumeration order to atom indices
        :rtype: {int: int}
        """

        output_order = self.mol.GetPropsAsDict(
            includePrivate=True, includeComputed=True)['_smilesAtomOutputOrder']
        return {o: i for i, o in enumerate(output_order)}

[docs]    def getLabeledMols(self, sm_index):
        """
        Return Mol objects, based on the SMILES held by the node, in
        which frozen atoms have been labeled using isotopes according to
        the formula

            label = FROZEN_FACTOR * sm_index + atom_index

        where atom_index is the 1-based index from the SMILES string,
        and innerfrozen atoms have been labeled according to

            label = INNERFROZEN_OFFSET + FROZEN_FACTOR * sm_index + atom_index

        :param sm_index: "starting material index". Used to distinguish between
            frozen atoms coming from different reactants.
        :type sm_index: int

        :return: labeled molecules
        :rtype: list of rdkit.Chem.Mol
        """
        mols = []
        for smiles in self.smiles_list:
            mol = Chem.MolFromSmiles(smiles)
            cxsmiles = mol_to_nomap_cxsmiles(mol)
            mol.SetProp(CXSMILES_PROP, cxsmiles)

            _encode_frozen_atoms_into_mol(mol, sm_index, self.frozen_atoms,
                                          self.innerfrozen_atoms)
            mols.append(mol)
        return mols


def _encode_frozen_atoms_into_mol(mol, sm_index, frozen_atoms,
                                  innerfrozen_atoms):
    for idx in frozen_atoms:
        label = _encode_frozen_props(idx, sm_index, innerfrozen=False)
        mol.GetAtomWithIdx(idx - 1).SetIsotope(label)
    for idx in innerfrozen_atoms:
        label = _encode_frozen_props(idx, sm_index, innerfrozen=True)
        mol.GetAtomWithIdx(idx - 1).SetIsotope(label)


def _encode_frozen_props(atom_idx, sm_index, innerfrozen):
    offset = INNERFROZEN_OFFSET if innerfrozen else 0
    return offset + FROZEN_FACTOR * sm_index + atom_idx


def _decode_frozen_props(iso):
    if (iso < FROZEN_FACTOR):
        return None
    inner_frozen = iso > INNERFROZEN_OFFSET
    iso = iso - INNERFROZEN_OFFSET if inner_frozen else iso
    reagent_index = int(iso / FROZEN_FACTOR)
    atom_index = iso % FROZEN_FACTOR
    return {
        "inner_frozen": inner_frozen,
        "reagent_index": reagent_index,
        "source_index": atom_index
    }


[docs]def read_route_file(filename, reactions_dict=None):
    """
    Read a route file in JSON format, returning a RouteNode object.

    :param reactions_dict: dictionary of Reaction objects by name.
    :type reactions_dict: dict of {str: Reaction}
    """
    with open(filename) as fh:
        tree = json.load(fh)
        return parse_route_data(tree, reactions_dict)


[docs]def parse_route_data(json_data, reactions_dict=None):
    """
    Generate a Route from the raw dict/list-based data structure usually
    obtained from a route JSON file.

    :param reactions_dict: dictionary of Reaction objects by name. Not required
        when using a self-contained route file.
    :type reactions_dict: dict of {str: Reaction}
    """
    if _is_self_contained(json_data):
        tree = route_file_schema(json_data['route'])
        reactions_dict = reaction.parse_reaction_data(json_data['reactions'])
    else:
        tree = route_file_schema(json_data)
    return _parse_route_node(tree, reactions_dict)


def _is_self_contained(json_data):
    """
    Return true if 'json_data' looks like the representation of a self-contained
    route file.

    :param json_data: body of route file
    :type json_data: dict

    :return: Does it look like a self-contained route?
    :rtype: bool
    """
    try:
        self_contained_route_schema(json_data)
        return True
    except voluptuous.Invalid:
        return False


def _parse_route_node(node, reactions_dict):
    if 'reaction_name' in node:
        reaction = reactions_dict[node['reaction_name']]
        precursors = [
            _parse_route_node(p, reactions_dict) for p in node['precursors']
        ]
        fr = node.get('fallback_reagent')
        if fr is not None and not (0 < fr <= len(precursors)):
            raise ValueError(f'Invalid fallback_reagent {fr} for a route node '
                             f'with {len(precursors)} precursors')
        smiles = node.get('smiles')
        mol = Chem.MolFromSmiles(smiles) if smiles else None
        route = RouteNode(mol=mol,
                          ncycles=node.get('ncycles'),
                          fallback_reagent=fr)
        route.setReactionInstance(reaction, precursors)
    else:
        route = ReagentNode(
            reagent_class=node.get('reagent'),
            filename=node.get('file'),
            smiles=node.get('smiles'),
            smiles_list=node.get('smiles_list'),
            frozen_atoms=node.get('frozen_atoms'),
            innerfrozen_atoms=node.get('innerfrozen_atoms'),
        )
    return route


[docs]def get_kekule_smiles(mol):
    """
    Return a Kekule SMILES, with explicit single bonds, for a molecule.

    :type mol: input molecule (not modified)
    :type mol: rdkit.Chem.Mol

    :return: Kekule SMILES
    :rtype: str
    """
    kmol = copy.copy(mol)
    Chem.Kekulize(kmol)
    return Chem.MolToSmiles(kmol, kekuleSmiles=True, allBondsExplicit=True)


[docs]class LazyIterable:
    """
    Lazily convert an iterator into an iterable. One could convert an iterator
    into a list, but that would consume the entire iterator upfront. This class
    only consumes as needed, but remembers everything that has been consumed so
    it can be reused.
    """

[docs]    def __init__(self, iterator):
        self.iterator = iterator
        self.pos = 0
        self.cache = []

    def __iter__(self):
        for i in itertools.count():
            if i < len(self.cache):
                yield self.cache[i]
            else:
                try:
                    val = next(self.iterator)
                except StopIteration:
                    break
                self.cache.append(val)
                yield val


def _lazy_product(*iterables):
    if not iterables:
        yield tuple()
    else:
        for i in iterables[0]:
            for j in _lazy_product(*iterables[1:]):
                yield (i,) + j


[docs]def lazy_product(*iterators):
    """
    Like itertools.product, but does not consume the iterators before starting
    to yield tuples. For example, before yielding the first tuple, only the
    first element from each iterator gets consumed.

    :param iterators: iterators

    :return: generator of tuples
    """
    iterables = [LazyIterable(it) for it in iterators]
    yield from _lazy_product(*iterables)


[docs]def parse_bond_reactions(vals):
    """
    Parse a list of bond reaction spec strings such as

        ['1-2:suzuki,stille', '8-5:amide_coupling-1']

    into a dict structure expected by the RouteNode APIs:

        {(1, 2): {'suzuki', 'stille'}, (5, 8): {'amide_coupling-1'}}

    :param vals: values from the -bond_reactions argument
    :type vals: list of str

    :return: dict where the keys are bonds (tuples of two atom indexes) and the
        values are sets of reaction names.
    :rtype: {(int, int): {str}}
    """
    bond_reactions = {}
    for val in vals:
        bond_str, reactions_str = val.split(':')
        a1, a2 = bond_str.split('-')
        bond = tuple(sorted([int(a1), int(a2)]))
        reactions = set(reactions_str.split(','))
        bond_reactions[bond] = reactions
    return bond_reactions


[docs]def mol_to_nomap_cxsmiles(mol):
    """
    Generate a CXSMILES from a Mol, but stripped of atom mappings. For example,
    return ``CCO`` instead of ``CC[OH:3] |atomProp:2.molAtomMapNumber.3|``.

    :type mol: input molecule (not modified)
    :type mol: rdkit.Chem.Mol

    :return: CXSMILES
    :rtype: str
    """
    tmp_mol = copy.copy(mol)
    for atom in tmp_mol.GetAtoms():
        atom.SetAtomMapNum(0)
    return Chem.MolToCXSmiles(tmp_mol)