Source code for schrodinger.application.pathfinder.reaction

"""
Classes to represent and apply chemical reactions.

Examples::

    # run one reaction
    rxn_smarts = "[C:8][N:7][C:2]([C:1])=[O:4]>>[C:1][C:2](O)=[O:4].[N:7][C:8]"
    rxn = Reaction("Amide coupling", rxn_smarts)
    amide = Chem.MolFromSmiles('CC(=O)NC')
    for acid, amine in rxn.apply((amide,)):
        print(Chem.MolToSmiles(acid))
        print(Chem.MolToSmiles(amine))
    reverse_rxn = rxn.inverse()

    # read reactions from file:
    reactions = read_reactions_file('reactions.json')

"""

import collections
import glob
import itertools
import json
import os

import voluptuous
from rdkit import Chem
from rdkit.Chem import AllChem
from voluptuous import All
from voluptuous import Any
from voluptuous import Length
from voluptuous import Required
from voluptuous import Schema

from schrodinger import structure
from schrodinger.rdkit import molio
from schrodinger.utils import fileutils
from schrodinger.utils import log

logger = log.get_output_logger("pathfinder")

# Symbolic constants for keywords used in JSON file
RETRO_SMARTS = 'retro_smarts'
SYN_SMARTS = 'syn_smarts'
SMARTS = 'smarts'
INVERSE_SMARTS = 'inverse_smarts'
TIER = 'tier'
ALLOW_MULTIPLE_PRODUCTS = 'allow_multiple_products'
RXNFILE = 'rxnfile'
INVERSE_RXNFILE = 'inverse_rxnfile'
TAGS = 'tags'
LHS_CLASSES = 'lhs_classes'
RHS_CLASSES = 'rhs_classes'
DESCRIPTION = 'description'
REACTIVE = 'reactive'
LONG_NAME = 'long_name'
LD_DATA = 'ld_data'

# Tag to be added to reactions read from the default reactions file.
DEFAULT_LIB_TAG = 'default'

CUSTOM_REACTIONS_LIBRARY = os.path.join(
    fileutils.get_directory_path(fileutils.LOCAL_APPDATA), 'reactions.json')
CUSTOM_REAGENTS_LIBRARY = os.path.join(
    fileutils.get_directory_path(fileutils.LOCAL_APPDATA),
    'reagent_classes.json')

# Schemas for validation JSON files.
reaction_file_schema = Schema({
    str: {
        Required(Any(SMARTS, RETRO_SMARTS, RXNFILE)): str,
        INVERSE_SMARTS: str,
        SYN_SMARTS: str,
        INVERSE_RXNFILE: str,
        LONG_NAME: str,
        DESCRIPTION: str,
        LHS_CLASSES: [str, None],
        RHS_CLASSES: [str, None],
        TAGS: [str],
        TIER: int,
        ALLOW_MULTIPLE_PRODUCTS: bool,
        LD_DATA: object,
    }
})

reagent_class_file_schema = Schema({
    str: {
        REACTIVE: bool,
        DESCRIPTION: Any(str, None),
        SMARTS: Any(str, [str], None),
    }
})

# This is a simplified schema because each section will be
# be validated later anyway using their own schemata.
pathfinder_file_schema = Schema(
    {
        Required('version'): int,
        Required('reactions'): dict,
        Required('reagent_classes'): dict,
        Required('inverse_tags'): list,
    },
    extra=True)

inverse_tags_schema = Schema([All(Length(2, 2), [[str]])])

LIBPATH_ENVVAR = 'SCHRODINGER_REAGENT_LIB'

H = Chem.MolFromSmarts('[H]')


[docs]class Reaction(object):
    """
    A Reaction object represents a generic reaction, such as "amide coupling".
    An actual instance of a reaction between specific reagents is a
    ReactionInstance (see below).

    A Reaction may optionally be associated with "reagent classes" for the
    molecules on the left-hand-side and right-hand-side of the reaction. We
    avoid the terms "reactants" and "products" because they depend on the
    context; Reaction objects may be used for actual reactions, but also for
    retrosynthetic transforms (where "target" and "precursors" would be more
    appropriate), or for even for alchemical transformations.

    A reagent class is just a name representing the type of compound involved
    in the reaction; the naming scheme is up to the caller. For example, for
    amide coupling, the reagent classes on one side might be "amine" and "acid",
    and on the other "amide".
    """

[docs]    def __init__(self,
                 name,
                 retro_smarts=None,
                 lhs_classes=None,
                 rhs_classes=None,
                 syn_smarts=None,
                 tags=None,
                 tier=None,
                 allow_multiple_products=False,
                 rxnfile=None,
                 inverse_rxnfile=None,
                 description=None,
                 reagents_dict=None,
                 long_name=None,
                 ld_data=None,
                 require_mapping=True,
                 **kw):
        """
        :param name: Reaction name.
        :type name: str

        :param retro_smarts: Reaction SMARTS (RDKit dialect).
        :type retro_smarts: str

        :param lhs_classes: Reagent classes for the left-hand-side of the
                reaction.
        :type lhs_classes: list of str or NoneType

        :param rhs_classes: Reagent classes for the right-hand-side of the
                reaction.
        :type rhs_classes: list of str or NoneType

        :param syn_smarts: Reaction SMARTS for the reverse reaction. If not
                specified, it is constructed automatically from 'retro_smarts'
                by swapping around the ">>".
        :type retro_smarts: str or NoneType

        :param tags: Optional list of tags associated with the reaction.
        :type tags: iterable of str or NoneType

        :param tier: an integer describing how "good" the reaction is (lower
                is better)
        :type tier: int

        :param allow_multiple_products: ignored for backward compatibility

        :param rxnfile: Path to MDL Rxnfile. May be used instead of
                'retro_smarts'.
        :type rxnfile: str

        :param inverse_rxnfile: Path to MDL Rxnfile for the reverse reaction.
                May be used instead of 'syn_smarts'.
        :type inverse_rxnfile: str

        :param description: Reaction description.
        :type description: str

        :param ld_data: arbitrary object reserved for the use of LiveDesign.
        :type ld_data: object

        :param require_mapping: if True, raise a ValueError for reactions
            without any atom mappings
        :type require_mapping: bool
        """
        self.name = name
        self.description = description
        self.rxnfile = rxnfile
        self.inverse_rxnfile = inverse_rxnfile
        # Allow smarts as synonym for backward compatibility.
        self.retro_smarts = retro_smarts or kw.get('smarts')
        if self.retro_smarts and rxnfile or not self.retro_smarts and not rxnfile:
            raise ValueError('Reaction requires either smarts or rxnfile')
        elif rxnfile:
            self.rxn = AllChem.ReactionFromRxnFile(rxnfile)
            self.retro_smarts = AllChem.ReactionToSmarts(self.rxn)
        else:
            self.rxn = AllChem.ReactionFromSmarts(self.retro_smarts)
        # Allow inverse_smarts as synonym for backward compatibility.
        syn_smarts = syn_smarts or kw.get('inverse_smarts')
        if syn_smarts and self.inverse_rxnfile:
            raise ValueError(
                "Can't specify both syn_smarts/inverse_smarts and inverse_rxnfile"
            )
        self.syn_smarts = syn_smarts or invert_reaction_smarts(
            self.retro_smarts)
        self.lhs_classes = self._validateReagentClasses(lhs_classes, False,
                                                        reagents_dict)
        self.rhs_classes = self._validateReagentClasses(rhs_classes, True,
                                                        reagents_dict)
        self.tags = set(tags) if tags else set()
        self.tier = tier
        self.long_name = long_name
        self.ld_data = ld_data

        reactants = self.rxn.GetReactants()
        self.explicit_h = (any(mol.HasSubstructMatch(H) for mol in reactants) or
                           any(_has_wildcards(mol) for mol in reactants))
        if require_mapping:
            self._validateAtomMappings()

    def __str__(self):
        return self.name

[docs]    def asDict(self):
        """
        Return a dict representation of the reaction suitable for JSON
        serialization.

        :return: dict
        :rtype: dict
        """
        keys = (RETRO_SMARTS, SYN_SMARTS, TIER, LONG_NAME, DESCRIPTION)
        d = {}
        for key in keys:
            val = getattr(self, key, None)
            if val is not None:
                d[key] = val
        # special cases
        if RETRO_SMARTS not in d:
            d[RXNFILE] = self.rxnfile
        if SYN_SMARTS not in d:
            d[INVERSE_RXNFILE] = self.inverse_rxnfile
        if self.tags:
            d[TAGS] = list(self.tags)

        def stringify_list(a):
            return [str(i) if i is not None else None for i in a]

        if any(self.rhs_classes):
            d[RHS_CLASSES] = stringify_list(self.rhs_classes)
        if any(self.lhs_classes):
            d[LHS_CLASSES] = stringify_list(self.lhs_classes)
        return d

    def _validateReagentClasses(self, classes, rhs, reagents_dict=None):
        if rhs:
            nprod = self.rxn.GetNumProductTemplates()
        else:
            nprod = self.rxn.GetNumReactantTemplates()
        if nprod < 1:
            raise ValueError("Reaction must have at least one reactant "
                             "and one product")
        if classes is None:
            return [None] * nprod
        else:
            if len(classes) != nprod:
                raise ValueError("invalid number of reagent classes for '%s': "
                                 "got %d, expected %d" %
                                 (self.name, len(classes), nprod))
            # Convert list of class names into list of ReagentClass objs.
            class_objs = []
            for cls in classes:
                if reagents_dict is not None and cls in reagents_dict:
                    class_objs.append(reagents_dict[cls])
                elif cls:
                    class_objs.append(ReagentClass(cls))
                else:
                    class_objs.append(None)
            return class_objs

    def _validateAtomMappings(self):
        left = {
            atom.GetAtomMapNum()
            for mol in self.rxn.GetReactants()
            for atom in mol.GetAtoms()
        }
        right = {
            atom.GetAtomMapNum()
            for mol in self.rxn.GetProducts()
            for atom in mol.GetAtoms()
        }
        left.discard(0)  # 0 comes from unmapped atoms
        right.discard(0)
        if not left & right:
            raise ValueError('Reaction has no atom mappings: %s (%s)' %
                             (self.name, self.retro_smarts))

[docs]    def apply(self, reactants):
        """
        Apply the reaction to the given reactants, returning a list of lists of
        products. The products are already sanitized.

        :type reactants: list Mol
        :rtype: list of list of Mol
        """
        if self.explicit_h:
            reactants = [Chem.AddHs(mol) for mol in reactants]
        debug_mols('Applying %s to ' % self, reactants)
        sanitized_product_lists = []
        try:
            product_lists = self.rxn.RunReactants(reactants)
        except Exception:
            return []
        if not product_lists and logger.getEffectiveLevel() <= log.DEBUG:
            self.debugReactants(reactants)
        for products in product_lists:
            try:
                debug_mols('Products before sanitization: ', products)
                if self.explicit_h:
                    products = [Chem.RemoveHs(mol) for mol in products]
                for product in products:
                    Chem.rdmolops.SanitizeMol(product)
                sanitized_product_lists.append(products)
            except:
                pass
        return sanitized_product_lists

[docs]    def inverse(self):
        """
        Return a new Reaction object for the inverse of the current reaction.
        """
        # We don't pass smarts if rxnfile is available to avoid raising an
        # exception. While the caller can't pass both smarts and rxnfile to
        # __init__, the object can actually have both because we construct
        # smarts from a rxnfile for internal use.
        return self.__class__(
            self.name,
            retro_smarts=self.syn_smarts if not self.inverse_rxnfile else None,
            lhs_classes=self.rhs_classes,
            rhs_classes=self.lhs_classes,
            syn_smarts=self.retro_smarts if not self.rxnfile else None,
            tags=self.tags,
            tier=self.tier,
            rxnfile=self.inverse_rxnfile,
            inverse_rxnfile=self.rxnfile)

[docs]    def suggestReagentClasses(self, r_classes, libpath=None, max_search=10):
        """
        Search through r_classes for reagent classes matching each of the
        reactants in the reaction.

        :param r_classes: dictionary of reagent classes by name
        :type r_classes: dict of ReagentClass

        :param libpath: list of directories to prepend to the standard reagent
                        library search path
        :type libpath: list of str

        :param max_search: maximum number of structures to search from each
            reagent file before deciding the file doesn't match.
        :type max_search: int

        :return: list of lists of reagent classes. Each item in the outer list
            corresponds to one of the molecules on the right-hand side of the
            reaction. Items in the inner list (which may be empty) are the
            suggested classes for that molecule.
        :rtype: list of list of ReagentClass

        """
        suggested_all = []
        inv_rxn = self.inverse().rxn
        for i in range(inv_rxn.GetNumReactantTemplates()):
            query = inv_rxn.GetReactantTemplate(i)
            suggested = []
            for rc in r_classes.values():
                fname = rc.findReagentFile(libpath=libpath)
                if fname is None:
                    continue
                reader = molio.get_mol_reader(fname)
                for mol in itertools.islice(reader, 0, max_search):
                    if mol.HasSubstructMatch(query):
                        suggested.append(rc)
                        break
            suggested_all.append(suggested)
        return suggested_all

[docs]    def getDisplaySmarts(self):
        """
        Return the display SMARTS for the reaction. This is a simplified
        SMARTS meant for depiction purposes only. If the reaction doesn't
        have a display SMARTS, the syn_smarts is returned.

        :return: display SMARTS
        :rtype: str
        """
        if self.ld_data and self.ld_data.get('display_smarts'):
            # Strip CXSMARTS extensions, which go after a space.
            return self.ld_data['display_smarts'].split()[0]
        else:
            return self.syn_smarts

[docs]    def debugReactants(self, reactants):
        """
        Print each reactant SMILES and reactant template SMARTS and whether they
        match. This helps debugging a reaction SMARTS by making it easier to see
        which component to focus on.

        :param reactants: list of reactants
        :type reactants: [Chem.Mol]
        """
        for tpl, mol in zip(self.rxn.GetReactants(), reactants):
            logger.debug('- {} {} {}'.format(mol.HasSubstructMatch(tpl),
                                             Chem.MolToSmiles(mol),
                                             Chem.MolToSmarts(tpl)))


[docs]class ReactionDict(dict):
    """
    Reaction dictionary with additional properties for reagent classes
    and inverse tags.

    :ivar reagent_classes: reagent classes
    :ivar inverse_tags: inverse tags
    """

[docs]    def __init__(self, reactions, reagent_classes, inverse_tags):
        """
        :param reactions: dict of reactions by name
        :type reactions: {str: ReagentClass}

        :param reagent_classes: dict of reagent classes by name
        :type reagent_classes: {str: ReagentClass}

        :param inverse_tags: dict of inverse tags for each tag
        :param inverse_tags: {str: set(str)}
        """

        self.update(reactions)
        self.reagent_classes = reagent_classes
        self.inverse_tags = inverse_tags


[docs]class ReagentClass(object):
    """
    Struct-like class to hold metadata for a reagent class. Fields include name,
    description, and 'reactive'. A reactive reagent is one which shouldn't be
    analyzed retrosynthetically. For example, acyl chlorides.
    """

[docs]    def __init__(self, name, reactive=False, description=None, smarts=None):
        self.name = str(name)
        self.reactive = reactive
        self.description = description
        if isinstance(smarts, str):
            self.smarts = [smarts]
        else:
            self.smarts = smarts

    def __str__(self):
        return self.name

[docs]    def asDict(self):
        """
        Return a dict representation of the reaction suitable for JSON
        serialization.

        :return: dict
        :rtype: dict
        """
        smarts = self.smarts
        # When the SMARTS list is of length 1, which is the normal case,
        # write it out as str instead of list of str to make it readable
        # by older versions of the software.
        if isinstance(smarts, list) and len(smarts) == 1:
            smarts = smarts[0]
        return {
            'reactive': self.reactive,
            'description': self.description,
            'smarts': smarts,
        }

[docs]    def findReagentFile(self, libpath=None):
        """
        Look for <reagent_class>.* (where the extension corresponds to a
        recognized structure file format) in each of the directories in the
        library search path. Return the first match, but if multiple matches
        are found in the same directory, raise an exception.

        :param libpath: list of directories to prepend to the default reagent
                        library search path
        :type libpath: list of str

        :return: path to reagent file, or None if not found
        :rtype: str

        :raises: ValueError if multiple matches are found in the CWD.
        """
        libpath = [] if libpath is None else libpath[:]
        libpath += get_libpath()
        for libdir in libpath:
            patt = os.path.join(libdir, f'{self.name}.*')
            matches = [f for f in glob.glob(patt) if is_reagent_filename(f)]
            if len(matches) == 1:
                return matches[0]
            elif len(matches) > 1:
                raise ValueError(
                    'Multiple files found for reagent '
                    'in the same directory: %s', ','.join(matches))
        return None

[docs]    def size(self, libpath=None):
        """
        Return the number of structures in the reagent file for this class.

        :param libpath: list of directories to prepend to the standard reagent
                        library search path
        :type libpath: list of str

        :return: number of structures, or zero when no file is found.
        :rtype: int
        """
        filename = self.findReagentFile(libpath=libpath)
        if filename is None:
            return 0
        if molio.is_pfx(filename):
            return molio.get_pfx_size(filename)
        elif molio.is_csvgz(filename):
            return len(molio.CsvMolReader(filename))
        elif fileutils.is_csv_file(filename):
            # Much faster than count_structures until SHARED-6205 is fixed.
            return max(fileutils.count_lines(filename) - 1, 0)
        else:
            return structure.count_structures(filename)

[docs]    def copyReagentFile(self, dest_file, libpath=None):
        """
        Exports reagents into a given .csv file.

        :param dest_file: output .csv file
        :type dest_file: str

        :param libpath: list of directories to prepend to the standard reagent
                        library search path
        :type libpath: list of str

        :return: False if reagent file was not found and True otherwise
        :rtype: bool
        """
        filename = self.findReagentFile(libpath=libpath)
        if filename is None:
            return False
        if molio.is_pfx(filename):
            molio.extract_structures(filename, dest_file)
        elif fileutils.is_csv_file(filename) or molio.is_csvgz(filename):
            molio.copy_csv_file(filename, dest_file)
        else:
            # Invalid reagent class type
            return False

        return True


[docs]def read_reactions_file(filename=None,
                        reagents_dict=None,
                        merge_with_defaults=False):
    """
    Read a reactions file in JSON format and return a dictionary of reactions
    by name. If filename is None, read the standard reactions file from
    the mmshare data directory and add the 'default' tag to each reaction.

    :param filename: file to read
    :type filename: str

    :param reagents_dict: dictionary of reagent classes (normally from
        read_reagent_classes_file).
    :type reagents_dict: dict {str: ReagentClass}

    :param merge_with_defaults: if True, read both the default file and the
        specified file and merge them.
    :type merge_with_defaults: bool

    :return: dictionary of reactions by name
    :rtype: dict {str: Reaction}
    """
    if filename and merge_with_defaults:
        default_reactions = read_reactions_file(reagents_dict=reagents_dict)
        user_reactions = read_reactions_file(filename,
                                             reagents_dict=reagents_dict)
        default_reactions.update(user_reactions)
        return default_reactions
    else:
        use_default = filename is None
        filename = filename or get_default_reactions_filename()
        logger.debug('Reading reactions file: %s', filename)
        # As a special case we'll ignore missing ~/.schrodinger/reactions.json
        # because too much panel code depends on this file existing.
        if (filename == CUSTOM_REACTIONS_LIBRARY and
                not os.path.isfile(filename)):
            return {}
        with open(filename) as fh:
            raw_dict = json.load(fh)
        reaction_dict = parse_reaction_data(raw_dict, reagents_dict)
        if use_default:
            for rxn in reaction_dict.values():
                rxn.tags.add(DEFAULT_LIB_TAG)
        return reaction_dict


[docs]def read_rxn_file(filename):
    """
    Read reaction from .rxn file and return dictionary of reactions.

    :param filename: file to read
    :type filename: str

    :return: dictionary of reactions by name
    :rtype: dict {str: Reaction}
    """
    #basename = os.path.basename(filename)
    #rxname, _ = fileutils.splitext(basename)
    rxname = fileutils.get_basename(filename)
    reaction = Reaction(rxname, rxnfile=filename).inverse()

    return {rxname: reaction}


[docs]def read_reagent_classes_file(filename=None, merge_with_defaults=False):
    """
    Read a reagent classes file in JSON format and return a dictionary of
    reagent classes by name. If filename is None, read the standard reagent
    classes file from the mmshare data directory.

    :param filename: file to read
    :type filename: str

    :param merge_with_defaults: if True, read both the default file and the
        specified file and merge them.
    :type merge_with_defaults: bool

    :return: dictionary of reagent classes by name
    :rtype: dict {str: ReagentClass}
    """
    if filename and merge_with_defaults:
        default_classes = read_reagent_classes_file()
        user_classes = read_reagent_classes_file(filename)
        return {**default_classes, **user_classes}
    else:
        filename = filename or get_default_reagent_classes_filename()
        logger.debug('Reading reagent classes file: %s', filename)
        # As a special case we'll ignore missing ~/.schrodinger/reactions.json
        # because too much panel code depends on this file existing.
        if (filename == CUSTOM_REAGENTS_LIBRARY and
                not os.path.isfile(filename)):
            return {}
        with open(filename) as fh:
            raw_dict = json.load(fh)
        return parse_reagent_classes_data(raw_dict)


[docs]def get_custom_reagent_classes(filename):
    """
    Reads a reagent classes file in JSON format and returns a dictionary of
    reagent classes by name. If custom reagent classes file is not found it
    will be created.

    :param filename: file to read
    :type custom_reagent_file: str

    :return: dictionary of reagent classes
    :rtype: dict {str: ReagentClass}
    """
    try:
        reagents_dict = read_reagent_classes_file(filename=filename)
    except FileNotFoundError as exc:
        # Create empty reagent classes file.
        reagents_dict = {}
    return reagents_dict


[docs]def get_default_reactions_filename():
    """
    Return the path to the default reactions file.

    :return: path
    :rtype: str
    """
    data_dir = fileutils.get_mmshare_data_dir()
    return os.path.join(data_dir, 'pathfinder.json')


[docs]def get_default_reagent_classes_filename():
    """
    Return the path to the default reagent classes file.

    :return: path
    :rtype: str
    """
    data_dir = fileutils.get_mmshare_data_dir()
    return os.path.join(data_dir, 'pathfinder.json')


[docs]def parse_inverse_tags(tag_tups):
    """
    Turn the list-based representation of inverse tags into a dictionary for
    ease of look up. For example, given ::

        [
            [['a', 'b'], ['c', 'd']],
            [['e'], ['f']]
        ]

    return ::

        {
            'a': {'c', 'd'},
            'b': {'c', 'd'},
            'c': {'a', 'b'},
            'd': {'a', 'b'},
            'e': {'f'},
            'f': {'e'}
        }

    :param tag_tups: a list of incompatible tag sets. In the example above,
                     reactions tagged "a" or "b" are considered inverse of
                     reactions tagged "c" or "d".
    :type tag_tups: list(list(list(str)))

    :return: dictionary where each key is a tag an each value is a set of tags
             considered "inverse" of the key. This is actually a defaultdict,
             where the default value is an empty set.
    :rtype: defaultdict(set(str))
    """
    inverse_tags_schema(tag_tups)
    tag_dict = collections.defaultdict(set)
    for tags1, tags2 in tag_tups:
        for t1 in tags1:
            tag_dict[t1] |= set(tags2)
        for t2 in tags2:
            tag_dict[t2] |= set(tags1)
    return tag_dict


[docs]def parse_pathfinder_data(raw_dict):
    """
    Convert a "raw dict" (usually from a pathfinder.json) into a ReactionDict.

    :rtype: ReactionDict
    """
    pathfinder_file_schema(raw_dict)
    reagents_dict = parse_reagent_classes_data(raw_dict['reagent_classes'])
    reactions = parse_reaction_data(raw_dict['reactions'], reagents_dict)
    inverse_tags = parse_inverse_tags(raw_dict['inverse_tags'])
    return ReactionDict(reactions, reagents_dict, inverse_tags)


def _is_pathfinder_data(json_data):
    """
    Return true if 'json_data' looks like the contents of a "PathFinder data
    file".

    :param json_data: body of data file
    :type json_data: dict

    :return: does it look like a PathFinder data file?
    :rtype: bool
    """
    try:
        pathfinder_file_schema(json_data)
        return True
    except voluptuous.Invalid:
        return False


[docs]def parse_reaction_data(raw_dict, reagents_dict=None):
    """
    Convert a "raw dict" (usually from a JSON file) into a dictionary
    of Reaction objects by name.

    :rtype: dict {str: Reaction}
    """
    if _is_pathfinder_data(raw_dict):
        return parse_pathfinder_data(raw_dict)
    if 'reactions' in raw_dict:
        # To support route files as reaction files.
        raw_dict = raw_dict['reactions']
    reaction_file_schema(raw_dict)
    reaction_dict = {}
    for name, raw_reaction in raw_dict.items():
        reaction_dict[name] = Reaction(name,
                                       reagents_dict=reagents_dict,
                                       **raw_reaction)
    logger.debug('Read %d reactions', len(reaction_dict))
    return reaction_dict


[docs]def parse_reagent_classes_data(raw_dict):
    """
    Convert a "raw dict" (usually from a JSON file) into a dictionary
    of ReagentClass objects by name.

    :rtype: dict {str: Reaction}
    """
    if _is_pathfinder_data(raw_dict):
        return parse_pathfinder_data(raw_dict).reagent_classes
    reagent_class_file_schema(raw_dict)
    classes_dict = {}
    for name, raw_class in raw_dict.items():
        classes_dict[name] = ReagentClass(name, **raw_class)
    return classes_dict


[docs]def write_reactions_file(reactions, filename):
    """
    Write a reactions file.
    :reactions: list or dict of reactions
    :type reactions: list or dict of Reaction
    """
    rxn_dict = {}
    for rxn in reactions.values():
        rxn_dict[rxn.name] = rxn.asDict()
    with open(filename, 'w') as fh:
        json.dump(rxn_dict, fh, indent=2, sort_keys=True)


[docs]def write_reagent_classes_file(reagent_classes, filename):
    """
    Write a reagent classes file.

    :param reagent_classes: dict of reagent classes by name
    :type reagent classes: {str: ReagentClass}

    :param filename: file to write
    :type filename: str
    """
    raw_dict = {rgc.name: rgc.asDict() for rgc in reagent_classes.values()}
    with open(filename, 'w') as fh:
        json.dump(raw_dict, fh, indent=2, sort_keys=True)


[docs]def invert_reaction_smarts(smarts):
    """
    Given a reaction SMARTS, return the reaction SMARTS for the reverse
    reaction.

    :type smarts: str

    :rtype: str
    """
    return '>>'.join(reversed(smarts.split('>>')))


[docs]def get_mmshare_reagent_data_dir():
    """
    Return the path to the reagent directory installed with mmshare.
    :rtype: str
    """
    data_dir = fileutils.get_mmshare_data_dir()
    return os.path.join(data_dir, 'reagents')


[docs]def get_libpath():
    """
    Return the reagent library search path. It consists of the directories
    listed in the SCHRODINGER_REAGENT_LIB environment variable, if it exists,
    followed by ~/.schrodinger/reagents or its Windows equivalent,
    followed by the mmshare data/reagents directory.

    :rtype: list of str
    """
    envvar_path = os.environ.get(LIBPATH_ENVVAR)
    if envvar_path:
        libpath = envvar_path.split(os.pathsep)
    else:
        libpath = []

    userdata = fileutils.get_directory_path(fileutils.LOCAL_APPDATA)
    libpath.append(os.path.join(userdata, 'reagents'))

    libpath.append(get_mmshare_reagent_data_dir())

    libpath.append('.')
    return libpath


[docs]def debug_mols(message, mols, separator=' + '):
    """
    Print a debug message (if the logger level indicates it), appending
    a list of SMILES representation of the molecules.

    :type message: str
    :type mols: iterable of Mol
    :type separator: str
    """
    if logger.getEffectiveLevel() <= log.DEBUG:
        smiles = separator.join(Chem.MolToSmiles(m) for m in mols)
        logger.debug(message + smiles)


[docs]def is_reagent_file(filename):
    """
    Check whether a given file is usable as a reagent file. A reagent file must
    be a structure file and the structures need to have a title. In the case of
    a csv file, we look for columns named 's_m_title' or 'NAME'.

    :param filename: filename
    :type filename: str

    :return: True if the file is a reagent file
    :rtype: bool
    """
    try:
        for mol in molio.get_mol_reader(filename, skip_bad=False):
            # We only check the first structure. It must have a non-empty title.
            return bool(get_title(mol))
    except Exception:
        pass
    return False


[docs]def is_reagent_filename(filename):
    """
    Check whether a filename has the extension for a possible reagent file
    format. Unlike is_reagent_file, it does not open the file to try to confirm
    whether there are valid structures in it.

    :param filename: filename
    :type filename: str

    :return: True if the filename may be a reagent file
    :rtype: bool
    """
    return bool(
        fileutils.get_structure_file_format(filename) or
        molio.is_csvgz(filename) or molio.is_pfx(filename))


[docs]def get_title(mol, prop='s_m_title'):
    """
    Return the title of a molecule. Looks for the property with the name
    specified by 'prop' but falls back to known aliases such as "NAME".

    :param mol: input molecule
    :type mol: Chem.Mol

    :param prop: title property name
    :type prop: str

    :return: molecule title, or None if unspecified
    :rtype: str or NoneType
    """
    props = (prop, 'NAME', '_Name')
    for prop in props:
        try:
            return mol.GetProp(prop)
        except KeyError:
            pass
    return None


[docs]def get_reactions(filename=None, reagent_classes_file=None, max_tier=None):
    """
    Convenience function to read the reactions and the reagent classes file with
    a single call, and optionally filtering the reactions by tier.

    :param filename: reactions file (JSON formatted). If None, read the standard
                     file.
    :type filename: str

    :param filename: reagent classes file (JSON formatted). If None, read the
                     standard file.
    :type filename: str

    :param max_tier: maximum reaction tier (None means all reactions)
    :type max_tier: int or NoneType

    :return: dictionary of reactions
    :rtype: {str: Reaction}
    """
    reagent_classes = read_reagent_classes_file(reagent_classes_file)
    reactions_dict = read_reactions_file(filename,
                                         reagents_dict=reagent_classes)
    # Copy the contents of the dict and then update in place, because
    # reactions_dict may actually be a ReactionDict instance containing
    # properties we want to preserve.
    all_reactions = dict(reactions_dict)
    reactions_dict.clear()
    reactions_dict.update({
        name: rxn
        for name, rxn in all_reactions.items()
        if max_tier is None or (rxn.tier is not None and rxn.tier <= max_tier)
    })
    return reactions_dict


def _has_wildcards(mol):
    """
    :param mol: input molecule
    :type mol: Chem.Mol

    :return: True if molecule has any wildcard atoms (e.g., ``*``, ``[*]``,
        or ``[*:42]``).
    :rtype: bool
    """
    # This will break if RDKit changes its DescribeQuery notation, but it is
    # covered by a unit test, and beats trying to parse SMARTS.
    return any(atom.DescribeQuery() == 'AtomNull\n' for atom in mol.GetAtoms())