Source code for schrodinger.rdkit.coarsegrain

"""
Conversions between Coarse-grained Schrodinger structure objects (mmct) and
RDKit mol objects.

Please see schrodinger.rdkit_adapter for structural/philosophic differences
between these two formats
"""

from collections import Counter
from rdkit import Chem

from schrodinger.infra import mm
from schrodinger.application.matsci import msutils
from schrodinger.thirdparty import rdkit_adapter

# Particle names that are forbidden to be used as CG names if to convert to rdkit
PROTECTED_PATTERN_BIT = ['D', 'R', 'r', 'v', 'x', 'X', 'H']

# Global variable, see get_proxy_periodic_table
_PROXY_PERIODIC_TABLE = None


[docs]def get_proxy_periodic_table(): """ Get proxy periodic table. :rtype: list[str] :return: Cached list of elements, do not modify!! """ global _PROXY_PERIODIC_TABLE if _PROXY_PERIODIC_TABLE is None: periodic_table = Chem.rdchem.GetPeriodicTable() _PROXY_PERIODIC_TABLE = [ periodic_table.GetElementSymbol(atomic_num) for atomic_num in range(1, 118) if periodic_table.GetDefaultValence(atomic_num) == -1 ] return _PROXY_PERIODIC_TABLE
# Each CG particle name is mapped to an element name. These atoms are used to # generate RDKIT mol for CG system.
[docs]def get_cgparticle_to_element_mapper(cg_particle_names): """ Create a mapper between coarse-grain particle name and an element. :type cg_particle_names: list :param cg_particle_names: list of CG particle names :rtype: dict :return: dict with CG particle name as key and element name as value """ # Can't use sets here, need to keep the order to be reproducible. # Keep only unique, still keep the order cg_particle_names = [ name for name, count in Counter(cg_particle_names).items() if count == 1 ] protected_set = set(cg_particle_names).intersection(PROTECTED_PATTERN_BIT) if protected_set: raise RuntimeError( 'Coarse-grained structures containing protected names, such as ' f'{sorted(protected_set)}, are not supported.') proxy_element = get_proxy_periodic_table() # remove CG bead names from the proxy element due to possible conflict # between two names. proxy_element = [ name for name in proxy_element if name not in cg_particle_names ] if len(cg_particle_names) > len(proxy_element): raise RuntimeError( f"Cannot have more than {len(proxy_element)} unique particle names") # Create a mapper between schrodinger CG particle name and new element # name for rdkit mol mapper = dict(zip(cg_particle_names, proxy_element)) return mapper
[docs]def prepare_cg_for_rdkit(struct): """ Create fake AA from a CG structure that can be converted to RDKIT mol. :param schrodinger.structure.Structure struct: Input CG structure :rtype: schrodinger.structure.Structure, dict :return: Fake AA structure and internal mapping dict between schrodinger particle name and rdkit proxy element name """ struct = struct.copy() msutils.remove_atom_property(struct, prop=mm.MMCT_ATOM_PROPERTY_COARSE_GRAIN) particle_name = sorted(set(atom.name for atom in struct.atom)) proxy_element_mapper = get_cgparticle_to_element_mapper(particle_name) for atom in struct.atom: atom.element = proxy_element_mapper[atom.name] return struct, proxy_element_mapper
def _coarsegrain_st_to_rdkit(st): """ Create RDKIT mol object from a coarse-grained structure :type st: `schrodinger.structure.Structure` :param st: structure :raise: rdkit_adapter.adapter.InconsistentStructureError :rtype: `rdkit.Mol`, dict :return: rdkit molecule and internal mapping dict between schrodinger particle name and rdkit proxy element name """ # an atomistic structure, raise error if not msutils.is_coarse_grain(st, by_atom=True): raise rdkit_adapter.adapter.InconsistentStructureError( "_coarsegrain_st_to_rdk_mol only supports coarse-grained " "structures. Please see rdkit_adapter.to_rdkit function") st, proxy_element_mapper = prepare_cg_for_rdkit(st) # include_properties and include_coordinates are set to false in # adapter.evaluate_smarts, match here. If changing please check the # performance (MATSCI-11446) mol = rdkit_adapter.to_rdkit(st, include_properties=False, include_coordinates=False, sanitize=False) # reset name of rd_mol for atom in mol.GetAtoms(): st_idx = atom.GetIntProp(rdkit_adapter.SDGR_INDEX) st_atom = st.atom[st_idx] atom.name = st_atom.name atom.SetProp('smilesSymbol', st_atom.name) atom.SetNoImplicit(True) mol.UpdatePropertyCache(strict=False) return mol, proxy_element_mapper
[docs]def get_coarsegrain_smiles(st, atom_ids=None): """ Get smiles for coarse-grained structure :type st: `schrodinger.structure.Structure` :param st: structure :type atom_ids: list :param atom_ids: list of substructure atom id :return: str :rtype: smiles for coarse grain substructure """ mol, _ = _coarsegrain_st_to_rdkit(st) if not atom_ids: return Chem.MolToSmiles(mol) sdgr_to_rdk_idx = rdkit_adapter.get_map_sdgr_to_rdk(mol) rdk_atom_ids = [sdgr_to_rdk_idx[idx] for idx in atom_ids] return Chem.MolFragmentToSmiles(mol, rdk_atom_ids)