Source code for schrodinger.application.scaffold_enumeration.atomlist

'''
Implements "atom list" enumeration (see ENUM-246).
'''

import collections
import re

import rdkit.Chem

from schrodinger.utils import log

from . import common

logger = log.get_output_logger(__name__)

#------------------------------------------------------------------------------#

AtomListInfo = collections.namedtuple(
   'AtomListInfo', [
       'atom',    # index of the atom with varying atomic number
       'elements' # list of the atomic numbers to be enumerated
]) # yapf: disable

cml_mrvQueryProps = common.CML_PROP_PREFIX + 'mrvQueryProps'

#------------------------------------------------------------------------------#


def _validate_elements(elements):
    '''
    Converts chemical element symbols into the corresponding
    atomic numbers and uniquifies them.

    :param elements: List of elements.
    :type text: list(str)

    :return: List of atomic numbers.
    :rtype: list(int)
    '''

    pt = rdkit.Chem.GetPeriodicTable()
    atomic_numbers = set()

    for e in elements:
        try:
            atomic_numbers.add(pt.GetAtomicNumber(e))
        except RuntimeError:
            logger.warning('could not figure out atomic number for "%s"', e)

    return sorted(atomic_numbers)


#------------------------------------------------------------------------------#


def _parse_mrv_atom_list(text):
    '''
    Parses "atom list" data from MRV query text. For example, for
    elements C, O and N, the query reads "L,C,O,N:".

    :param text: Text to parse.
    :type text: str

    :return: List of chemical elements.
    :rtype: list(str)
    '''

    elements = []

    for m in re.findall(r'L((?:,[^:,L]+)+):', text):
        for e in m.split(','):
            if e:
                elements.append(e)

    return elements


#------------------------------------------------------------------------------#


def _collect_atom_lists(mol):
    '''
    Collects "atom list" data (ENUM-246).

    :param mol: Molecule.
    :type mol: rdkit.Chem.Mol

    :return: List of `AtomListInfo` instances.
    :rtype: list(AtomListInfo)
    '''

    atomlists = []

    for atom in mol.GetAtoms():
        elements = _validate_elements(get_atom_elements(atom))
        if elements:
            atomlists.append(AtomListInfo(atom.GetIdx(), elements))

    return atomlists


#------------------------------------------------------------------------------#


[docs]def get_atom_elements(atom): ''' Returns atom list associated with the `atom`. :param atom: RDKit atom. :type atom: `rdkit.Chem.Atom` :return: List of elements. :rtype: list(str) ''' try: return _parse_mrv_atom_list(atom.GetProp(cml_mrvQueryProps)) except KeyError: return []
#------------------------------------------------------------------------------#
[docs]def set_atom_elements(atom, elements): ''' Makes `atom` into an atom list (in ENUM-246 sense). :param atom: RDKit atom. :type atom: `rdkit.Chem.Atom` :param elements: Iterable over elements. :type elements: iterable over str ''' if not elements: raise RuntimeError("no elements") try: orig = atom.GetProp(cml_mrvQueryProps) except KeyError: orig = '' atom.SetProp(cml_mrvQueryProps, orig + ','.join(['L'] + list(elements)) + ':')
#------------------------------------------------------------------------------#
[docs]class AtomListEnumerable(common.EnumerableMixin):
[docs] def __init__(self, mol): ''' :param mol: RDKit molecule. :type mol: ROMol ''' self.atomlists = _collect_atom_lists(mol) self.mol = mol
[docs] def getExtents(self): return [len(a.elements) for a in self.atomlists]
[docs] def getRealization(self, idx): ''' :param idx: "Index" of a realization. :type idx: iterable over int :return: RDKit molecule without enumerable "atom lists". :rtype: rdkit.Chem.Mol ''' if self.atomlists: outcome = rdkit.Chem.Mol(self.mol) for (i, alist) in zip(idx, self.atomlists): atom = outcome.GetAtomWithIdx(alist.atom) atom.ClearProp(cml_mrvQueryProps) atom.SetAtomicNum(alist.elements[i]) return outcome else: return self.mol
#------------------------------------------------------------------------------#