Source code for schrodinger.application.scaffold_enumeration.common

import itertools
import json

CML_PROP_PREFIX = 'cml_'

CML_ID_PROP = CML_PROP_PREFIX + 'id'
CML_SGROUPS_PROP = CML_PROP_PREFIX + 'sgroups'

CML_RGROUP_REF_PROP = CML_PROP_PREFIX + 'rgroupRef'
CML_ATTACHMENT_ORDER_PROP = CML_PROP_PREFIX + 'attachmentOrder'

#------------------------------------------------------------------------------#


[docs]def get_atom_id(atom): ''' Returns atom ID obtained either from the CML ID property or derived from the atom index in case the ID property is not available. :param atom: Atom. :type atom: rdkit.Chem.Atom :return: Atom ID. :rtype: str ''' if atom.HasProp(CML_ID_PROP): return atom.GetProp(CML_ID_PROP) else: return f'a{atom.GetIdx() + 1}'
#------------------------------------------------------------------------------#
[docs]def get_atom_id_map(mol): ''' Returns a dictionary that maps "atom IDs" (obtained via `get_atom_id()`) onto atom indices. :param mol: Molecule. :type mol: rdkit.Chem.Mol :return: Map between atom IDs and their indices in `mol`. :rtype: dict(str, int) ''' outcome = dict() for atom in mol.GetAtoms(): label = get_atom_id(atom) if label in outcome: raise RuntimeError(f'same ID ({label}) shared by several atoms') else: outcome[label] = atom.GetIdx() return outcome
#------------------------------------------------------------------------------#
[docs]def get_bond_id(bond): ''' Returns bond ID obtained either from the CML ID property or derived from the bond index in case the ID property is not available. :param bond: Bond. :type bond: rdkit.Chem.Bond :return: Bond ID. :rtype: str ''' if bond.HasProp(CML_ID_PROP): return bond.GetProp(CML_ID_PROP) else: return f'b{bond.GetIdx()}'
#------------------------------------------------------------------------------#
[docs]def get_sgroups(mol): ''' Returns list of dictionaries that represent "S-groups" from CML input. CML reader stores this data as a molecule-level property in JSON format. :param mol: Molecule. :type mol: rdkit.Chem.Mol :return: List of dictionaries that contain "S-groups" as captured from CML input (XML attributes stored as key/value pairs). :rtype: list(dict(str, str)) ''' try: return json.loads(mol.GetProp(CML_SGROUPS_PROP)) except KeyError: return []
#------------------------------------------------------------------------------#
[docs]def set_sgroups(mol, sgroups): ''' Serializes list of dictionaries `sgroups` as JSON and stores the outcome as a molecular-level property (CML_SGROUPS_PROP). :param mol: Molecule. :type mol: rdkit.Chem.Mol :param sgroups: List of dictionaries that meant to represent "S-groups". :type sgroups: list(dict(str, str)) ''' mol.SetProp(CML_SGROUPS_PROP, json.dumps(sgroups))
#------------------------------------------------------------------------------#
[docs]def product_of_ranges(extents): ''' Returns iterator over Cartesian product of ranges. :param extents: Iterable over the range extents. For example, if extents is (3, 8), iterator domain is going to be [0, 3) x [0, 8). :type extents: iterable over positive int :return: Iterator over tuples of integers. ''' ranges = (range(n) for n in extents) return itertools.product(*ranges)
#------------------------------------------------------------------------------#
[docs]class EnumerableMixin: ''' Methods common to several "enumerables". '''
[docs] def getIter(self): ''' Returns iterator over realizations. :return: Iterator over `getRealization()` returns. :rtype: iterator ''' iterable = product_of_ranges(extents=self.getExtents()) return map(lambda i: self.getRealization(i), iterable)
[docs] def getRandomRealization(self, prng): ''' Returns random realization. :param prng: mt19937 pseudorandom number generator from numpy. :type prng: `numpy.random.RandomState` ''' return self.getRealization( [prng.randint(0, e) for e in self.getExtents()])
#------------------------------------------------------------------------------#