Source code for schrodinger.livedesign.setup_reaction

"""
Users commonly sketch reactions that are not directly useable in
reaction enumeration. This module includes tools that are needed to convert
a reaction to a suitable format for use in reaction enumeration.

Here's how LiveDesign calls the reaction enumeration tool:
    seurat/TaskEngineTasks/python/schrodinger_enumerate/schrodinger_enumerate.py

This requires jaguar to be available to work.
"""

import collections
import itertools
from typing import List

from rdkit import Chem
from rdkit.Chem import rdqueries
from rdkit.Chem.rdChemReactions import ChemicalReaction

from schrodinger import structure
from schrodinger.livedesign import convert
from schrodinger.livedesign import preprocessor
from schrodinger.structure._structure import NO_SPECIFIC_ISOTOPE
from schrodinger.thirdparty import rdkit_adapter

try:
    from schrodinger.application.jaguar.packages import reaction_mapping
except ImportError as e:
    # in build without jaguar
    reaction_mapping = None

RDK_LABEL_PROP = 'i_tmp_rdkmol'
ST_MAP_NUMBER_PROP = 'i_rdkit_molAtomMapNumber'


def _setup_rgroup_rxn_map_nums(mol):
    """
    Turn indexed R groups into dummy atoms with atom map numbers > 100

    Reactions are commonly drawn with mapped R groups, and nothing else mapped. The R
    groups should be turned into star (wildcard) atoms with the correct atom mapping.

    to be tested:
    - collisions if some existing atoms are mapped
    """
    for a in mol.GetAtoms():
        try:
            rlabel = a.GetUnsignedProp(preprocessor.MOL_PROP_R_LABEL)
        except KeyError:
            continue
        a.SetAtomMapNum(rlabel + 100)
        a.ClearProp(preprocessor.MOL_PROP_R_LABEL)
        a.ClearProp(preprocessor.ATOM_PROP_DUMMY_LABEL)


def _lock_attachments(mol: Chem.Mol) -> Chem.Mol:
    """
    If there are R groups, lock each atom to have only the current number
    of non-hydrogen neighbors.

    R groups may be either H or heavy, so atoms neighboring R groups
    have a range of valid values.

    for example:
        CCCR -> [C&d1][C&d2][C&d{1-2}]R

    """
    for a in mol.GetAtoms():
        if _is_rgroup_or_star(a):
            continue
        if a.GetAtomicNum() == 1:
            # don't need to lock hydrogens
            continue
        non_h_degree = 0
        star_neighbors = 0
        for n in a.GetNeighbors():
            if _is_rgroup_or_star(n):
                star_neighbors += 1
            elif n.GetAtomicNum() != 1:
                non_h_degree += 1
        if star_neighbors != 0:
            low = non_h_degree
            high = non_h_degree + star_neighbors
            expr = f'[d{{{low}-{high}}}]'
        else:
            expr = f'[d{non_h_degree}]'
        if a.HasQuery():
            a.ExpandQuery(Chem.AtomFromSmarts(expr))
        else:
            qa = rdqueries.AtomNumEqualsQueryAtom(a.GetAtomicNum())
            qa.ExpandQuery(Chem.AtomFromSmarts(expr))
            mol.ReplaceAtom(a.GetIdx(), qa)


def _is_rgroup_or_star(atom: Chem.Atom) -> bool:
    if atom.HasProp(preprocessor.MOL_PROP_R_LABEL):
        return True
    if preprocessor.is_wildcard(atom):
        return True
    return False


def _has_rgroup_or_star(mol: Chem.Mol) -> bool:
    return any(_is_rgroup_or_star(a) for a in mol.GetAtoms())


def _get_rdk_atom(st_atom, rdk_mols):
    rdk_mol_idx = st_atom.structure.property[RDK_LABEL_PROP]
    rdk_atom_idx = st_atom.property[rdkit_adapter.adapter.RDK_INDEX]
    return rdk_mols[rdk_mol_idx].GetAtomWithIdx(rdk_atom_idx)


def _apply_rxn_mapping(reactants, products, rxn_mapping):
    """
    Add the map indices to the atoms in the reaction.
    """

    map_num = 0
    for reactant_atom, product_atom in rxn_mapping.items():
        if reactant_atom is None or product_atom is None:
            # atom appears/disappears in the reaction
            continue

        try:
            reactant_rdk_atom = _get_rdk_atom(reactant_atom, reactants)
            product_rdk_atom = _get_rdk_atom(product_atom, products)
        except KeyError:
            # Atoms can be added/removed in the reaction, and won't be mapped.
            # Just skip them.
            continue

        reactant_map_number = reactant_rdk_atom.GetAtomMapNum()
        if reactant_map_number >= 100:
            if reactant_map_number == product_rdk_atom.GetAtomMapNum():
                # Leave R Groups alone (we already mapped them).
                continue
            elif product_rdk_atom.GetAtomMapNum() < 100:
                raise RuntimeError('R group remapped to non- R group atom')
            else:
                raise RuntimeError('R group remapped different R group')
        elif product_rdk_atom.GetAtomMapNum() >= 100:
            raise RuntimeError('R group remapped to non- R group atom')

        map_num += 1
        reactant_rdk_atom.SetAtomMapNum(map_num)
        product_rdk_atom.SetAtomMapNum(map_num)


def _has_query_charge(atom):
    """Does the atom have a charge or a charge query?"""
    return bool(atom.GetFormalCharge() or 'Charge' in atom.DescribeQuery())


def _replace_with_0charge(atom):
    """Replace an "implicit" charge 0 with a query for exactly 0 charge."""
    if atom.HasQuery():
        atom.ExpandQuery(
            rdqueries.FormalChargeEqualsQueryAtom(atom.GetFormalCharge()))
    else:
        qa = rdqueries.AtomNumEqualsQueryAtom(atom.GetAtomicNum())
        qa.ExpandQuery(
            rdqueries.FormalChargeEqualsQueryAtom(atom.GetFormalCharge()))
        mol = atom.GetOwningMol()
        idx = atom.GetIdx()
        mol.ReplaceAtom(idx, qa)


def _fix_charge_modifing_atoms(reacting_atoms: list):
    """
    Change [N+:1]>>[N:1] to [N+:1]>>[N+0:1]

    If charge isn't specified in a reaction product, then the charge is
    propagated from the reactant - so the input reaction would have done
    nothing! Our sketcher doesn't allow drawing an "explicit 0 charge",
    so we need to guess if that's what the user meant.
    """
    for reactant_atom, product_atom in reacting_atoms:
        if _has_query_charge(reactant_atom) ^ _has_query_charge(product_atom):
            if not _has_query_charge(reactant_atom):
                _replace_with_0charge(reactant_atom)
            if not _has_query_charge(product_atom):
                _replace_with_0charge(product_atom)


def _fix_disappearing_hydrogens(reacting_atoms):
    """
    Add explicit graph hydrogens to reactant atoms that need to have their
    hydrogens removed in a reaction
    """
    needs_h = collections.defaultdict(list)
    for reactant_atom, product_atom in reacting_atoms:
        product_atom.GetOwningMol().UpdatePropertyCache(strict=False)
        reactant_atom.GetOwningMol().UpdatePropertyCache(strict=False)

        reactant_hs = reactant_atom.GetTotalNumHs(includeNeighbors=True)
        product_hs = product_atom.GetTotalNumHs(includeNeighbors=True)

        if reactant_hs <= product_hs:
            continue

        diff = reactant_hs - product_hs
        mol = reactant_atom.GetOwningMol()
        needs_h[mol].append((reactant_atom.GetIdx(), diff))

    for mol, updates in needs_h.items():
        with mol:
            for atom_index, num_hs in updates:
                for _ in range(num_hs):
                    new_h = Chem.Atom(1)
                    new_h_idx = mol.AddAtom(new_h)
                    mol.AddBond(new_h_idx, atom_index, Chem.BondType.SINGLE)


def _fix_nitros(reactants: List[Chem.RWMol]):
    """
    Generalize nitro expressions in reactants

    A general nitro matching expression is: [#NX3]([OX1])[OX1]

    """

    # Can't use reactions to do this replacement, because reactions are
    # meant to generate real molecules, not queries. Otherwise, we'd do:
    # [NX3](=O)=O>>[#NX3]([OX1])[OX1]
    # [N+X3](=O)[O-]>>[#NX3]([OX1])[OX1]

    nitro1 = Chem.MolFromSmarts('[NX3](=O)=O')
    nitro2 = Chem.MolFromSmarts('[N+X3](=O)[O-]')

    new_n = Chem.AtomFromSmarts('[NX3]')
    new_o = Chem.AtomFromSmarts('[OX1]')
    new_bond = Chem.BondFromSmarts("-,=")

    for r in reactants:
        r.UpdatePropertyCache(strict=False)
        for (n, o1, o2) in itertools.chain(r.GetSubstructMatches(nitro1),
                                           r.GetSubstructMatches(nitro2)):
            r.ReplaceAtom(n, new_n, preserveProps=True)
            r.ReplaceAtom(o1, new_o, preserveProps=True)
            r.ReplaceAtom(o2, new_o, preserveProps=True)

            old_bond1 = r.GetBondBetweenAtoms(n, o1).GetIdx()
            r.ReplaceBond(old_bond1, new_bond, preserveProps=True)
            old_bond2 = r.GetBondBetweenAtoms(n, o2).GetIdx()
            r.ReplaceBond(old_bond2, new_bond, preserveProps=True)
        r.UpdatePropertyCache(strict=False)


def _is_removable_hydrogen(atom):
    return atom.atomic_number == 1 and atom.isotope == NO_SPECIFIC_ISOTOPE and ST_MAP_NUMBER_PROP not in atom.property


def _remove_unnecessary_hs(st):
    """
    Removes Hydrogens that are not necessary in reaction mapping to prevent
    unnecessary computation when mapping reaction with jaguar.

    :param: The structure to be mapped in a reaction.

    :return: The input Structure with all removable hydrogens deleted from it.
    """
    remove_indices = [at.index for at in st.atom if _is_removable_hydrogen(at)]
    st.deleteAtoms(remove_indices)


[docs]def map_reaction(reactants, products): """ Call Jaguar's reaction mapper. :param reactants: list of schrodinger Structures :param products: list of schrodinger Structures :return: list of pairs of atoms that correspond """ # sets this property to tell the mapper which atoms match. for a in (a for st in (reactants + products) for a in st.atom): try: map_number = a.property[ST_MAP_NUMBER_PROP] a.property[reaction_mapping.AUTOTS_ATOM_CLASS] = map_number except KeyError: pass return reaction_mapping.get_rp_map(reactants, products)
def _can_map_additional_atoms(rxn: ChemicalReaction): """ Could we possibly map more atoms? In order to map additional atoms, there need to be unmapped atoms in both the reactant and product. """ reactant_atoms = (a for m in rxn.GetReactants() for a in m.GetAtoms()) unmapped_reactant = any(a.GetAtomMapNum() == 0 for a in reactant_atoms) product_atoms = (a for m in rxn.GetProducts() for a in m.GetAtoms()) unmapped_product = any(a.GetAtomMapNum() == 0 for a in product_atoms) return unmapped_reactant and unmapped_product def _get_reacting_atoms(reactants, products): """Use the map number to get the atom pairs that are reacting.""" def map_to_a(mols, name): atoms = ((a.GetAtomMapNum(), a) for m in mols for a in m.GetAtoms()) # every atom has a map number, map number 0 is unmapped: atoms = [(number, a) for number, a in atoms if number != 0] map_number_count = len(atoms) atoms = dict(atoms) if map_number_count != len(atoms): raise ValueError(f'Duplicate map numbers in {name}') return atoms reactant_atoms = map_to_a(reactants, 'reactants') product_atoms = map_to_a(products, 'products') if set(reactant_atoms) != set(product_atoms): raise ValueError('Reactants and products have different atom maps') return [(reactant_atoms[k], product_atoms[k]) for k in reactant_atoms] def _st_from_rdkit_query(mol: Chem.Mol) -> structure.Structure: """ Query mols may be fragments that are not kekulizable. We still need a Structure to use the mapper, but the precise bond orders don't matter so much for the mapper, so it's ok that this is fake. """ mol = Chem.RWMol(mol) Chem.rdmolops.KekulizeIfPossible(mol) for b in mol.GetBonds(): if b.GetBondType() == Chem.BondType.AROMATIC: b.SetBondType(Chem.BondType.SINGLE) return rdkit_adapter.from_rdkit(mol, conformer=-1)
[docs]def setup_reaction(rxn: ChemicalReaction) -> ChemicalReaction: """ Converts display reaction into a reaction useable by core suite's enumeration tool. Users commonly sketch reactions without atom mappings, and that assume certain things. Reaction enumeration requires that: - In reactants, hydrogens attached to aromatic nitrogens must be explicit - Reactants be aromatized rather than kekulized - Each R-group must be converted to '*' (any atom) with a mapping number >100 - If there are R groups, that's the only place the fragments should be allowed to connect - Atoms must be mapped from the reactant to the product. """ each_has_rlabel = [_has_rgroup_or_star(m) for m in rxn.GetReactants()] if _can_map_additional_atoms(rxn): reactants = [] st_reactants = [] for i, m in enumerate(rxn.GetReactants()): _setup_rgroup_rxn_map_nums(m) m = convert.add_hs_to_aromatic_nitrogen(m) m.UpdatePropertyCache(False) Chem.rdCoordGen.AddCoords(m) reactants.append(m) # Jaguar reaction mapper requires coordinates to be present st = _st_from_rdkit_query(m) st.property[RDK_LABEL_PROP] = i _remove_unnecessary_hs(st) st_reactants.append(st) products = [] st_products = [] for i, m in enumerate(rxn.GetProducts()): _setup_rgroup_rxn_map_nums(m) # m = convert.add_hs_to_aromatic_nitrogen(m) m.UpdatePropertyCache(False) Chem.rdCoordGen.AddCoords(m) products.append(m) # Jaguar reaction mapper requires coordinates to be present st = _st_from_rdkit_query(m) st.property[RDK_LABEL_PROP] = i _remove_unnecessary_hs(st) st_products.append(st) # This happens at the end in case atoms are added/removed from the reaction during processing # returns 0 or 1 mappings rxn_mappings = map_reaction(st_reactants, st_products) if not rxn_mappings: msg = "Could not map atoms between reactants and products" raise RuntimeError(msg) _apply_rxn_mapping(reactants, products, rxn_mappings[0]) else: reactants = rxn.GetReactants() products = rxn.GetProducts() # we may edit connectivity, so swap to RWMol reactants = [Chem.RWMol(r) for r in reactants] reacting_atoms = _get_reacting_atoms(reactants, products) _fix_charge_modifing_atoms(reacting_atoms) _fix_disappearing_hydrogens(reacting_atoms) _fix_nitros(reactants) # Aromatic reactants are required for reaction enumeration to # work correctly, but cause problems for atom mapping. params = Chem.AdjustQueryParameters.NoAdjustments() params.aromatizeIfPossible = True rxn_mapped = ChemicalReaction() for has_rlabel, m in zip(each_has_rlabel, reactants): m = Chem.AdjustQueryProperties(m, params) m.UpdatePropertyCache(False) if has_rlabel: _lock_attachments(m) rxn_mapped.AddReactantTemplate(m) for m in products: rxn_mapped.AddProductTemplate(m) return rxn_mapped