Source code for schrodinger.analysis.transformations

"""

Module to support chemical transformations defined via reaction SMARTS
(RDKit dialect).

Transformations can be applied either individually using
apply_transform(), or "en-masse" via apply_transforms() (see below).

TransformsRepository class supports loading of the transformations
from files (or text) in JSON format.

"""

import json
import os
from collections import defaultdict

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import rdmolops
from voluptuous import All
from voluptuous import Any
from voluptuous import Length
from voluptuous import Required
from voluptuous import Schema

from schrodinger import job
from schrodinger.structutils import analyze
from schrodinger.structutils import rmsd
from schrodinger.rdkit import rdkit_adapter
from schrodinger.utils import log

#------------------------------------------------------------------------------#


[docs]class TransformsRepository(object): system_file = os.path.join(job.util.hunt('mmshare', 'data'), 'transformations.json') validate = Schema([{ 'name': Any(str), Required('smarts'): Any(str), Required('tags'): All([Any(str)], Length(min=1)) }], extra=True)
[docs] def __init__(self): self.entries = []
def _appendEntries(self, entries): for e in TransformsRepository.validate(entries): try: e['tags'] = [e['name']] + e['tags'] except KeyError: pass self.entries.append(e)
[docs] def load(self, fp): self._appendEntries(json.load(fp))
[docs] def loads(self, text): self._appendEntries(json.loads(text))
[docs] def loadFile(self, filename): with open(filename, 'r') as fp: self.load(fp)
@property def tags(self): acc = set() for e in self.entries: for t in e['tags']: acc.add(t) return acc @property def shared_tags(self): """ Tags shared by two or more entries. """ counts = defaultdict(int) for e in self.entries: for t in set(e['tags']): counts[t] += 1 return {k for (k, v) in counts.items() if v > 1}
[docs] def getEntries(self, tags=None): if tags: if type(tags) != set: tags = set(tags) return [e for e in self.entries if tags.intersection(e['tags'])] else: return self.entries
[docs] def getSmarts(self, tags=None): return [e['smarts'] for e in self.getEntries(tags)]
#------------------------------------------------------------------------------# def _extract_heavy_and_polar_atoms(st, core=None, logger=None): ''' Extracts heavy atoms and polar hydrogens, returns (st_noH, core_noH) where st_noH is the substructure of the input st, core_noH is set of the atom indices in st_noH that correspond to the "core" atoms in the input structure. :type st: `Structure` :param st: Input structure. :type core: iterable :param core: Indices of the atoms that must be kept unchanged. ''' if logger is None: logger = log.get_logger(__file__) noH = analyze.evaluate_asl(st, 'not (non_polar_hydrogens)') st_noH = st.extract(noH) to_noH = {j: i for (i, j) in enumerate(noH, start=1)} core_noH = set() if core: for i in core: try: core_noH.add(to_noH[i]) except KeyError: logger.warning('ignoring non-polar core Hydrogen %d', i) return (st_noH, core_noH) #------------------------------------------------------------------------------#
[docs]def apply_transform(st, rxn, core=None, f3d_engine=None, logger=None): ''' Applies single transformation and generates (smiles, st, core) tuples for the outcomes. :type st: `Structure` :param st: Input structure. :type rxn: `ChemicalReaction` instance. :param rxn: Reaction to be performed. :type core: iterable :param core: Indices of the atoms that must be kept unchanged. :type f3d_engine: `fast3d.SingleConformerEngine` instance. :param f3d_engine: fast3d handle to be used for 3d coordinates generation (omitted if None). ''' if logger is None: logger = log.get_logger(__file__) # drop non-polar Hydrogens st_noH, core_noH = _extract_heavy_and_polar_atoms(st, core, logger) try: mol = rdkit_adapter.to_rdkit(st_noH, implicitH=False, include_properties=True) except: logger.error('could not convert structure to RDkit molecule') raise StopIteration() for atom in mol.GetAtoms(): atom.ClearProp('_protected') # use "isotope" to propagate atom indices through "reaction" atom.SetIsotope(atom.GetIdx() + 1) products = rxn.RunReactants((mol,)) seen = set() for p_mol, in products: unchanged = set() # indices in mol for p_atom in p_mol.GetAtoms(): atom_idx = p_atom.GetIsotope() if atom_idx: unchanged.add(atom_idx) p_atom.SetIsotope(0) # core must not be changed if not core_noH.issubset(unchanged): continue # sanitize product molecule, get unique smiles try: rdmolops.SanitizeMol(p_mol) smiles = Chem.MolToSmiles(p_mol, isomericSmiles=True) except: continue if smiles in seen: continue # get Schrodinger CT try: p_st = rdkit_adapter.from_rdkit(p_mol) except: continue # map the core r2p = dict() # 1-based atoms mapping from noH st to p_st for p_atom in p_st.atom: try: r2p[p_atom.property['i_rdkit_react_atom_idx'] + 1] \ = p_atom.index except KeyError: pass p_core = {r2p[i] for i in core_noH} seen.add(smiles) if f3d_engine is None: yield (smiles, p_st, p_core) continue # instantiate 3d coordinates if requested try: f3d_engine.run(p_st, list(p_core)) except RuntimeError as e: logger.warning('3d coordinates generation failed: %s', e) # align core try: core_atoms = list(core_noH) p_core_atoms = [r2p[i] for i in core_atoms] except KeyError: core_atoms = [] p_core_atoms = [] if len(core_atoms) > 2: rmsd.superimpose(st_noH, core_atoms, p_st, p_core_atoms, use_symmetry=False, move_which=rmsd.CT) yield (smiles, p_st, p_core)
#------------------------------------------------------------------------------#
[docs]def apply_transforms(st, transforms, core=None, f3d_engine=None, logger=None): ''' Applies transformations to structure. Generator of the (`Structure`, smiles, core, route) tuples. :type st: `Structure` :param st: Input structure. :type transforms: list of lists of `ChemicalReaction` instances. :param transforms: List of lists of reactions to be performed. :type core: iterable :param core: Indices of the atoms that must be kept unchanged. :type f3d_engine: `fast3d.SingleConformerEngine` instance. :param f3d_engine: fast3d handle to be used for 3d coordinates generation (omitted if None). ''' if logger is None: logger = log.get_logger(__file__) try: smiles = structure_to_rdkit_smiles(st) except ValueError as e: logger.error(str(e)) raise StopIteration() yielded = set([smiles]) def make_closure(reactions, reactants): def fn(): for (r_smiles, r_st, r_core, r_route) in reactants: for rxn in reactions: try: rxn_smarts = rxn.smarts except AttributeError: try: rxn_smarts = AllChem.ReactionToSmarts(rxn) except RuntimeError: rxn_smarts = 'error' for (p_smiles, p_st, p_core) in \ apply_transform(r_st, rxn, r_core, f3d_engine, logger): if p_smiles not in yielded: yield (p_smiles, p_st, p_core, list(r_route) + [rxn_smarts]) yielded.add(p_smiles) if r_smiles not in yielded: yield (r_smiles, r_st, r_core, r_route) yielded.add(r_smiles) return fn outcome = [(smiles, st, core, [])] for stage in transforms: outcome = make_closure(stage, outcome)() return outcome
#------------------------------------------------------------------------------#
[docs]def structure_to_rdkit_smiles(st): noH = analyze.evaluate_asl(st, 'not (non_polar_hydrogens)') try: mol = rdkit_adapter.to_rdkit(st.extract(noH), implicitH=False, include_properties=False) except: raise ValueError('could not convert structure to RDkit molecule') return Chem.MolToSmiles(mol, isomericSmiles=True)
#------------------------------------------------------------------------------#
[docs]def rdkit_reaction_from_smarts(smarts): rxn = AllChem.ReactionFromSmarts(smarts) rxn.smarts = smarts return rxn
#------------------------------------------------------------------------------#