Source code for schrodinger.application.scaffold_enumeration.layered

import itertools

import more_itertools

from . import atomlist
from . import linknode
from . import markush
from . import posvarbond


def _get_linknodes_and_pvbonds(mol, linknodes=None, pvbonds=None):
    """
    Collects and pre-validates "repeating units"
    and "position variant bonds" specifications.

    :param mol: Molecule.
    :type mol: rdkit.Chem.Mol

    :param linknodes: Link nodes (repeating units).
    :type linknodes: list(linknode.LinknodeSgroup)

    :param pvbonds: Position variant bonds.
    :type pvbonds: list(posvarbond.MulticenterSgroup)

    :return: Lists of "repeating units" and "position variant bonds".
    :rtype: (list(linknode.LinknodeSgroup), list(posvarbond.MulticenterSgroup))
    """

    if linknodes is None:
        # to be validated later
        linknodes = linknode._collect_linknodes(mol)

    if pvbonds is None:
        # to be validated later
        pvbonds = posvarbond._collect_posvarbonds(mol)

    for ln in linknodes:
        for pvb in pvbonds:
            if ln.atoms & set(pvb.atoms) or pvb.center in ln.atoms:
                raise ValueError(
                    'position variable bonds within repeating units')

    return linknodes, pvbonds


def _get_random_flat_realization(mol, prng, linknodes, pvbonds):
    """
    Returns random realization of the molecule obtained via not nestable
    flavors of enumeration:

    * repeating units,
    * position variant bonds, and
    * atom lists

    (in that order: "repeating units" must be expanded prior to "position
    variant bonds" because enumeration of the latter involves atom deletions).

    :param mol: Molecule.
    :type mol: rdkit.Chem.Mol

    :param prng: MT19937 pseudorandom number generator from numpy.
    :type prng: `numpy.random.RandomState`

    :param linknodes: Link nodes (repeating units), pre-validated.
    :type linknodes: list(linknode.LinknodeSgroup)

    :param pvbonds: Position variant bonds, pre-validated.
    :type pvbonds: list(posvarbond.MulticenterSgroup)

    :return: Molecule
    :rtype: rdkit.Chem.Mol
    """

    tmp = linknode.LinknodeEnumerable(
        mol, linknodes=linknodes).getRandomRealization(prng)

    tmp = posvarbond.PosVarBondEnumerable(
        tmp, pvbonds=pvbonds).getRandomRealization(prng)

    return atomlist.AtomListEnumerable(tmp).getRandomRealization(prng)


[docs]def flat_enumerator(mol, prng=None, linknodes=None, pvbonds=None): """ Returns iterator over realizations obtained via not nestable flavors of enumeration: * repeating units, * position variant bonds, and * atom lists (in that order: "repeating units" must be expanded prior to "position variant bonds" because enumeration of the latter involves atom deletions). :param mol: Unadulterated molecule. :type mol: rdkit.Chem.Mol :param prng: MT19937 pseudorandom number generator from numpy or None. :type prng: Optional[numpy.random.RandomState] :param linknodes: Link nodes (repeating units). :type linknodes: list(linknode.LinknodeSgroup) :param pvbonds: Position variant bonds. :type pvbonds: list(posvarbond.MulticenterSgroup) :return: Iterator over molecules. :rtype: iterator over rdkit.Chem.Mol """ # FIXME: can atom indices change in RDKit::ROMol(const ROMol&)? _linknodes, _pvbonds = _get_linknodes_and_pvbonds(mol, linknodes=linknodes, pvbonds=pvbonds) if prng: # random def get_realization(): return _get_random_flat_realization(mol, prng, linknodes=linknodes, pvbonds=pvbonds) return more_itertools.repeatfunc(get_realization) else: # sequential ln_iter = linknode.LinknodeEnumerable(mol, linknodes=linknodes).getIter() iterable_over_pvb_iters = (posvarbond.PosVarBondEnumerable( m, pvbonds=pvbonds).getIter() for m in ln_iter) pvb_iter = itertools.chain.from_iterable(iterable_over_pvb_iters) iterable_over_atomlist_iters = ( atomlist.AtomListEnumerable(m).getIter() for m in pvb_iter) return itertools.chain.from_iterable(iterable_over_atomlist_iters)
[docs]def flat_list_enumerator(molecules, prng=None): """ Returns iterator over structures obtained by applying "flat" enumeration to the `molecules`. :param molecules: List of (kinky) molecules. :type molecules: list[rdkit.Chem.Mol] :param prng: MT19937 pseudorandom number generator from numpy or None. :type prng: Optional[numpy.random.RandomState] :return: Iterator over molecules. :rtype: iterator over rdkit.Chem.Mol """ if prng: # random def get_realization(): return next(flat_enumerator(prng.choice(molecules), prng=prng)) return more_itertools.repeatfunc(get_realization) else: # sequential iterable = (flat_enumerator(m, prng=None) for m in molecules) return itertools.chain.from_iterable(iterable)
[docs]def place_rgroups(mol, todo, rgroups, prng, homo): """ Generator that yields realizations of `mol` with (some) atoms replaced by R-groups from `todo` (and, potentially, `rgroups`). :param mol: Scaffold molecule. :type mol: rdkit.Chem.Mol :param todo: List of tuples of atom indices (in `mol`) paired with corresponding R-groups. :type todo: list[tuple[int, rdkit.Chem.Mol] :param rgroups: Dictionary that maps R-group numbers (positive integers) onto list of molecules. Assumed to contain "original" R-groups that may be necessary if some of the R-groups in `todo` include R-group references. :type rgroups: dict[int, list[rdkit.Chem.Mol]] :param prng: MT19937 pseudorandom number generator from numpy or None. :type prng: Optional[numpy.random.RandomState] :param homo: IDs (positive integers) of the homo R-groups. :type homo: Optional[set[int]] """ todo_iters = [] # in `todo` order for _, rgmol in todo: if markush.get_rlabels_set(rgmol) & rgroups.keys(): # `rgmol` references nested R-groups that need to be enumerated todo_iters.append(enumerate_rgroups(rgmol, rgroups, prng, homo)) else: todo_iters.append([rgmol]) todo_iters_product = \ zip(*todo_iters) if prng else itertools.product(*todo_iters) for rgs in todo_iters_product: atoms_and_rgroups = list( itertools.chain.from_iterable( zip(indices, itertools.repeat(rg)) for ((indices, _), rg) in zip(todo, rgs))) yield from markush.place_rgroups(mol, atoms_and_rgroups)
[docs]def enumerate_rgroups(mol, rgroups, prng=None, homo=None): """ Enumerates R-groups in `mol` using R-groups from `rgroups`. :param mol: Scaffold molecule. :type mol: rdkit.Chem.Mol :param rgroups: Dictionary that maps R-group numbers (positive integers) onto list of molecules. :type rgroups: dict[int, list[rdkit.Chem.Mol]] :param prng: MT19937 pseudorandom number generator from numpy or None. :type prng: Optional[numpy.random.RandomState] :param homo: IDs (positive integers) of the homo R-groups. :type homo: Optional[set[int]] """ # resolve/flatten R-groups flat_rgroup_iters = [] flat_rgroup_atoms = [] for rlabel, atoms in markush.get_rlabels_map(mol).items(): try: replacements = rgroups[rlabel] except KeyError: continue viable_replacements = [] for rgmol in replacements: rgmol_rlabels = markush.get_rlabels_set(rgmol) if rgmol_rlabels: # example: R2 in R2-C should not be replaced with # anything that references R1 or R2; may reference R3 if rlabel < min(rgmol_rlabels): viable_replacements.append(rgmol) else: viable_replacements.append(rgmol) if viable_replacements: if homo and rlabel in homo: # all `atoms` to be replaced with the same R-group realization flat_rgroup_atoms.append(atoms) flat_rgroup_iters.append( flat_list_enumerator(viable_replacements, prng)) else: # enumerate R-group realizations at different `atoms` for idx in atoms: flat_rgroup_atoms.append([idx]) flat_rgroup_iters.append( flat_list_enumerator(viable_replacements, prng)) # loop over R-group combinations flat_rgroups_iters_product = zip(*flat_rgroup_iters) \ if prng else itertools.product(*flat_rgroup_iters) for rgs in flat_rgroups_iters_product: atoms_and_rgroups = list(zip(flat_rgroup_atoms, rgs)) yield from place_rgroups(mol, atoms_and_rgroups, rgroups=rgroups, prng=prng, homo=homo)
[docs]def collection(mol, rgroups=None, prng=None, homo=None): """ Top-level API: generator that yields molecules obtained from `mol`. :param mol: Scaffold molecule. :type mol: rdkit.Chem.Mol :param rgroups: Dictionary that maps R-group numbers (positive integers) onto list of molecules. :type rgroups: dict[int, list[rdkit.Chem.Mol]] :param prng: MT19937 pseudorandom number generator from numpy or None. :type prng: Optional[numpy.random.RandomState] :param homo: IDs (positive integers) of the "homo" R-groups. Regular R-groups that share the same label get enumerated independently (e.g., four outcomes are expected for ``R1-CO-R1`` with ``R1 = [*Cl, *F]``). OTOH, homo R-groups with the same label end up with the same realization (within a nesting level), so that only two outcomes would be obtained in the example above. :type homo: Optional[set[int]] """ if rgroups is None: rgroups = dict() for flat_mol in flat_enumerator(mol, prng): if prng: # random yield next( enumerate_rgroups(flat_mol, rgroups=rgroups, prng=prng, homo=homo)) else: # sequential yield from enumerate_rgroups(flat_mol, rgroups=rgroups, prng=prng, homo=homo)