Source code for schrodinger.application.dendrogram.core_class

"""
decompose.py: Contains classes to fragment molecules into cores,
linkers and side chains.

Version v0.

Copyright Schrodinger, LLC. All rights reserved.
"""

import os.path
import sys
from collections import namedtuple
from copy import deepcopy
from pprint import pformat

from schrodinger import structure
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.structutils import smiles
from schrodinger.utils import fileutils

# Module-leval objects & definitions:
smiles_gen_stereo = smiles.SmilesGenerator(stereo=smiles.STEREO_FROM_GEOMETRY)
smiles_gen_nostereo = smiles.SmilesGenerator(stereo=smiles.NO_STEREO)
# Ring system informaiton.
#  rep = "representation", usually SMARTS or SMILES-like, but not necessarily an actual SMARTS or SMILES
#  name = Either the representaion or a slightly more user-readable example of it
RingSysInfo = namedtuple(
    'RingSysInfo', 'rep name capped_smi uncapped_smi capped_st uncapped_st')


[docs]class Utilities(object): """ Class containing methods used by multiple other classes. """
[docs] @staticmethod def getRingAtomToRings(ring_atoms, rings): """ Create a dictionary that associates each ring atom index with a set of indices into the rings arrays: """ ring_atom_to_rings = {} for iatom in ring_atoms: ring_atom_to_rings[iatom] = set() for iring, ring in enumerate(rings): if iatom in ring: ring_atom_to_rings[iatom].add(iring) return ring_atom_to_rings
[docs] @staticmethod def getExtendedRingAtomToRings(st, ring_atom_to_rings): """ Create a dictionary that includes ring_atom_to_rings and adds off-ring atoms multiple-bonded to any ring atoms. An added off-ring atoms points to the appropriate index into rings, but is not added to the set of ring atoms. """ # Set extended_ring_atom_to_rint to current value of ring_atom_to_rings: extended_ring_atom_to_rings = deepcopy(ring_atom_to_rings) ring_atoms = list(ring_atom_to_rings) # Ring atoms for iatom in ring_atoms: for iring in extended_ring_atom_to_rings[ iatom]: # Set of atoms in one of iatom's rings atom1 = st.atom[iatom] # atom1 is a ring atom for bond in atom1.bond: if bond.order < 2: continue atom2 = bond.atom2 # atom2 is multple-bonded to atom1 iatom2 = atom2.index if iatom2 not in ring_atoms: # atom2 is not a ring atom extended_ring_atom_to_rings[iatom2] = set([iring]) return extended_ring_atom_to_rings
[docs] @staticmethod def molHasRing(mol): """ Maybe not the most efficient way to do this. Find rings in the new molecule. If there are none, it is not a ring fragment. """ if mol.ring is None or len(mol.ring) == 0: return False else: return True
[docs] @staticmethod def adjustName(name): """ Eliminate leading and trailing blanks; replace newlines with '; ', then replace remaining internal white space with single blanks. """ name = name.strip() name = name.replace('\n', '; ') ar = name.split() name = ' '.join(ar) return name
[docs] @staticmethod def capToStar(smi, smiles_gen): """ Do string replacement in a SMILES, replacing all He with the '*' wildcard. Canonicalize using the smiles_gen method provided. """ cap = r'[He]' smi = smi.replace(cap, r'[*]') smi = smiles_gen.canonicalize(smi) return smi
[docs] @staticmethod def stripBrackets(string): """ Strip brackets from a string -- typically SMARTS or SMILES -- to create a more readable version for use as a user-visible name. """ string = string.replace(r'[', '') string = string.replace(r']', '') return string
[docs] @staticmethod def getRingDataStructures(st): """ Create and return several commonly used data structures concerning ring systems. """ # List of atom sets for rings in ring system rings = [] # True or False depending on aromaticity of each atom set ring_is_aromatic = [] for ring in st.ring: rings.append(set(ring.getAtomIndices())) ring_is_aromatic.append(ring.isAromatic()) # Create a list of ring atoms: ring_atoms = set().union(*rings) return rings, ring_atoms, ring_is_aromatic
[docs] @staticmethod def getAromaticRingAtoms(rings, ring_atoms, ring_is_aromatic): """ Return sets of aromatic and nonaromatic ring atoms. """ # Create set of aromatic ring atoms: aromatic_ring_atoms = set() for iring, ring_atom_set in enumerate(rings): if ring_is_aromatic[iring]: aromatic_ring_atoms |= ring_atom_set # Disjunction of two sets: nonaromatic_ring_atoms = ring_atoms - aromatic_ring_atoms return aromatic_ring_atoms, nonaromatic_ring_atoms
[docs] @staticmethod def getRingSysSt(st): """ Given a fragmented molecule, turn the fragment containing the ring system into an st and return that st. Make sure there is only one. """ ring_sys_sts = [] # We keep an array only for the assertion test below for frag in st.molecule: # Extract the fragment into a full-fledged structure: mol = structure._Molecule.extractStructure(frag) # Now determine whether the current fragment has rings: if Utilities.molHasRing(mol): ring_sys_sts.append(mol) assert len(ring_sys_sts) == 1 # Should be only one... return ring_sys_sts[0] # ... so return that one.
[docs]class GetPartitions(object): # Holds the list of partition names for a single ring system: NameList = namedtuple('NameList', 'name0 name1 name2 name3a name3b name4 name5 name6')
[docs] def __init__(self, st, ist, log_f, smi_f): """ Decomposes the structure into ring systems and stores an array NameList objects, one for each ring system found. """ self.name_lists = self._getPartitions(st, ist, log_f, smi_f)
[docs] def getNextFormattedNameArray(self): """ On each call, yields an array suitable for passing to the DB for one ring system found in the input structure. The array contains partition names for the ring system for all partitionings. """ for name_list in self.name_lists: ar = self.getFormattedNames(name_list) yield ar
[docs] def getFormattedNames(self, name_list): ar = [[name_list.name0], [name_list.name1], [name_list.name2], [name_list.name3a, name_list.name3b], [name_list.name4], [name_list.name5], [name_list.name6]] ar.reverse( ) # We reverse the order of levels because in the DB, Root is 0 return ar
def _getPartitions(self, st, ist, log_f, smi_f): """ Computes partitions for all sing systems in this structure and returns an array of NameList objects, one for each such ring system. If there are no ring systems in the structure, returns an empty list. """ name_lists = [] parent_smi = smiles_gen_stereo.getSmiles(st) parent_name = st.title parent_name = Utilities.adjustName(parent_name) if not parent_name: parent_name = str(ist) parent_action = RingSysStorage.addParent(parent_smi, parent_name) smi_f.write('%s Parent, ist= %d\n' % (parent_smi, ist)) deepest0 = Deepest0( st ) # This call destroys the parent st and creates any ring systems therein log_f.write( 'St %d, parent_name= "%s", parent nring_sys= %d, action= %s\n' % (ist, parent_name, deepest0.nring_sys, parent_action)) # Receives a tuple consisting of a capped and an uncapped core SMILES. The capped one # is for storage as a db Item; the uncapped (actually, H-capped) one can be used to compute # properties like MW. # # The sequence of creating representations is: 0, 1, 3a, 2, 4, 3b, 5, 6, where 0 is "deepest0", the # individual ring system. for ring_sys_info in deepest0.getRingSysInfo(): capped_smi0 = ring_sys_info.capped_smi rep0 = ring_sys_info.rep name0 = ring_sys_info.name action = RingSysStorage.addRingSys(capped_smi0, parent_smi) action = 'ADDED_RING_SYS' log_f.write( ' total nring_sys= %d, nparent= %d, ring_sys= %s, parent= %s, action= %s\n' % (RingSysStorage.nring_sys, RingSysStorage.nparent, capped_smi0, parent_smi, action)) if action == 'ADDED_RING_SYS': output_str = '%s Deepest0 rep, name= %s\n' % (rep0, name0) smi_f.write(output_str) log_f.write(' ' + output_str) ring_sys_info1 = Deepest1(ring_sys_info).getRingSysInfo() rep1 = ring_sys_info1.rep name1 = ring_sys_info1.name output_str = '%s Deepest1 rep, name= %s\n' % (rep1, name1) smi_f.write(output_str) log_f.write(' ' + output_str) ring_sys_info3a = Deepest3a(ring_sys_info1).getRingSysInfo() rep3a = ring_sys_info3a.rep name3a = ring_sys_info3a.name output_str = '%s Deepest3a rep, name= %s\n' % (rep3a, name3a) smi_f.write(output_str) log_f.write(' ' + output_str) ring_sys_info2 = Deepest2(ring_sys_info3a).getRingSysInfo() rep2 = ring_sys_info2.rep name2 = ring_sys_info2.name output_str = '%s Deepest2 rep= %s, name= %s\n' % ('FOO', rep2, name2) smi_f.write(output_str) log_f.write(' ' + output_str) ring_sys_info4 = Deepest4(ring_sys_info3a).getRingSysInfo() rep4 = ring_sys_info4.rep name4 = ring_sys_info4.name output_str = '%s Deepest4 rep, name= %s\n' % (rep4, name4) smi_f.write(output_str) log_f.write(' ' + output_str) ring_sys_info3b = Deepest3b(ring_sys_info4).getRingSysInfo() rep3b = ring_sys_info3b.rep name3b = ring_sys_info3b.name output_str = '%s Deepest3b rep= %s name= %s\n' % ( '[B][Ar]', rep3b, name3b) smi_f.write(output_str) log_f.write(' ' + output_str) ring_sys_info5 = Deepest5(ring_sys_info3b).getRingSysInfo() rep5 = ring_sys_info5.rep name5 = ring_sys_info5.name output_str = '%s Deepest5 rep= %s name= %s\n' % ('[B][As]', rep5, name5) smi_f.write(output_str) log_f.write(' ' + output_str) ring_sys_info6 = Deepest6(ring_sys_info5).getRingSysInfo() rep6 = ring_sys_info6.rep name6 = ring_sys_info6.name output_str = '%s Deepest6 rep= %s name= %s\n' % ('[B][I][P]', rep6, name6) smi_f.write(output_str) log_f.write(' ' + output_str) name_list = self.NameList(rep0, rep1, rep2, rep3a, rep3b, rep4, rep5, rep6) formatted_names = self.getFormattedNames(name_list) pretty_formatted_names_ar = pformat(formatted_names).split('\n') log_f.write(' %s\n' % 'DB-formatted names=') for line in pretty_formatted_names_ar: log_f.write(' %s\n' % line) name_lists.append(name_list) return name_lists
[docs]class RingSysStorage(object): """ Maintain storage for lookup of ring systems and parent structures by SMILES. Each ring system SMILES is associated with a set of parent SMILES in a dictionary. Each parent SMILES is associated with a set of names of input structures containing the parent, because duplicate entries, possibly with different names, can occur. The purpose of all this is to make sure we we don't add the same ring systems multiple times when they occur in multiple parents. """ # Return codes; the maximum value of actions performed is returned. ADDED_NOTHING = 0 # st, parent, and parent_name was already present ADDED_PARENT_NAME = 1 # Added new parent name to existing parent ADDED_PARENT = 2 # Added new parent and parent name to existing ring_sys ADDED_RING_SYS = 3 # Added new ring_sys, parent, and parent name action_names = ( 'ADDED_NOTHING', 'ADDED_PARENT_NAME', 'ADDED_PARENT', 'ADDED_RING_SYS', ) # The storage: ring_sys_d = {} # dict: ring_sys smi -> set of parent smi parent_d = {} # dict: parent smi -> set of parent names # These are syntactic sugar and are updated whenever anything is done. Use of # a property would be more in the spirit of DRY: nring_sys = 0 nparent = 0
[docs] @classmethod def addParent(cls, parent_smi, parent_name): """ Update the ring_system dictionary and the parent dictionary and return an ADDED code describing the "biggest" thing we have done. """ # A little aliasing for convenience: parent_d = cls.parent_d actions = set([cls.ADDED_NOTHING]) # Update update the parent dictionary: if parent_smi in parent_d: if parent_name not in parent_d[parent_smi]: # Additional name for previously seen parent: parent_d[parent_smi].add(parent_name) actions.add(cls.ADDED_PARENT_NAME) else: # Newly seen parent: parent_d[parent_smi] = set([parent_name]) actions.add(cls.ADDED_PARENT) # Update convenience variable: cls.nparent = len(parent_d) # Return the "biggest" action we performed: action = max(actions) action_name = cls.action_names[action] return action_name
[docs] @classmethod def addRingSys(cls, smi, parent_smi): """ Update the ring_system dictionary and the parent dictionary and return an ADDED code describing the "biggest" thing we have done. """ # A little aliasing for convenience: ring_sys_d = cls.ring_sys_d actions = set([cls.ADDED_NOTHING]) # Update the ring_sys dictionary: if smi in ring_sys_d: # We've seen this ring_sys before: if parent_smi not in ring_sys_d[smi]: # ... but not from this parent, so add the parent # SMILES to the ring's set of parents: ring_sys_d[smi].add(parent_smi) actions.add(cls.ADDED_PARENT) else: # This is a new ring_sys, so add it and point to the current parent: ring_sys_d[smi] = set([parent_smi]) actions.add(cls.ADDED_RING_SYS) # Update convenience variable: cls.nring_sys = len(ring_sys_d) # Return the "biggest" action we performed: action = max(actions) action_name = cls.action_names[action] return action_name
[docs]class Deepest0(object): """ Extracts ring systems at the deepest level from a structure. Starts by setting up method to yield deepest-level ring systems in st. Subsequent method calls can request the ring systems and build representations at higher hierarchical levels. Internally, the deepest level is level 0, since this is where we start building. """
[docs] def __init__(self, st): """ Precompute and store ring systems from the specified structure so the user can access them later using getRingSys(). self.ring_sys_info_ar is also created for more detailed introspection. """ # In each ring_sys_info tuple: # capped_st: a ring system in the input st with He replacing the sidechains # uncapped_st: the capped_st with H replacing the sidechains # capped_smi: the stereo SMILES of the capped_st # uncapped_smi: the stereo SMILES of the uncapped_st # rep: the capped SMILES with He replace by '*' # name: the rep with brackets removed self.ring_sys_info_ar = self._deepest0(st) # TODO: It would be safer to make this a property: self.nring_sys = len(self.ring_sys_info_ar)
[docs] def getRingSysInfo(self): """ Each call returns a tuple containing the pre-computed SMILES of the uncapped and capped (respectively) ring system. We return the uncapped one because we may want to use it to compute physical properties, like MW. """ for ring_sys_info in self.ring_sys_info_ar: yield ring_sys_info
def _deepest0(self, st): """ Required actions: for each ring system in the structure (which is a parent structure): - Consider double-bonded off-ring atoms to be part of the ring system - Break bonds not within the ring system - Cap and store each ring system. """ rings, ring_atoms, ring_is_aromatic = Utilities.getRingDataStructures( st) # Create a dictionary of ring atom indices to lists, each list containing # the indices of the rings in the rings list to which the atom in question # belongs: ring_atom_to_rings = Utilities.getRingAtomToRings(ring_atoms, rings) # Create a dictionary including ring_atom_to_rings, but adding to it # off-ring atoms multiple-bonded to aromatic ring atoms: extended_ring_atom_to_rings = Utilities.getExtendedRingAtomToRings( st, ring_atom_to_rings) # Break bonds between ring atoms and other atoms to fragment the # molecule: self._breakBonds(st, extended_ring_atom_to_rings) # Cap and store the ring systems: ring_sys_info_ar = self._getCappedRingSys(st) return ring_sys_info_ar def _breakBonds(self, st, extended_ring_atom_to_rings): """ Break bonds to rings to form fragments. """ ring_atoms = list(extended_ring_atom_to_rings) atom_pairs_to_break = [] for iatom in ring_atoms: atom1 = st.atom[iatom] for bond in atom1.bond: atom2 = bond.atom2 if self._isEligibleForBreaking(atom1, atom2, extended_ring_atom_to_rings): atom_pairs_to_break.append((atom1, atom2)) # NB we cannot break the bond at this point, because # that updates the bond list, which destroys the iteration. # Instead, we store the atom pairs whose bond is to be broken # and do the breaking later. for atom1, atom2 in atom_pairs_to_break: try: st.deleteBond(atom1, atom2) except Exception: # Happens when (x,y) and (y,x) are both in the list, as in biphenyl continue def _isEligibleForBreaking(self, atom1, atom2, extended_ring_atom_to_rings): """ Return True or False, dictating whether the bond between atom1 and atom2 is to be broken or not. Atom1 is always a ring atom or an atom multiple-bonded to an aromatic ring atom. N.B.: atom1 and atom2 are Schrodinger pythonic atoms, not atom indices (integers). """ ring_atoms = list(extended_ring_atom_to_rings) # atom indices is_eligible = None iatom = atom1.index jatom = atom2.index if atom2.atomic_number == 1: is_eligible = False elif jatom not in ring_atoms: # Off-ring atom is_eligible = True else: # Ring-ring bonds are broken only if there is no ring the two # atoms share. So biphenyl will be broken into two benzenes, but # naphthalene will not be broken up: iring = extended_ring_atom_to_rings[iatom] jring = extended_ring_atom_to_rings[jatom] if iring & jring: # The two sets have a ring in common is_eligible = False else: is_eligible = True assert is_eligible is not None return is_eligible def _getCappedRingSys(self, st): """ Cap and classify each fragment, then return a list of tuples, each tuple consisting of (capped_smiles, uncapped_smiles) """ ring_sys_info_ar = [ ] # list of RingSysInfo tuples for ring systems in this st for capped_st, uncapped_st in self._capFragments(st): capped_smi = smiles_gen_stereo.getSmiles(capped_st) rep = Utilities.capToStar(capped_smi, smiles_gen_stereo) name = Utilities.stripBrackets(rep) uncapped_smi = smiles_gen_stereo.getSmiles(uncapped_st) ring_sys_info = RingSysInfo(rep, name, capped_smi, uncapped_smi, capped_st, uncapped_st) ring_sys_info_ar.append(ring_sys_info) return ring_sys_info_ar def _capFragments(self, st): """ Cap and classify the fragments created by bond breaking, returning each fragment in turn using a Python yield: """ for frag in st.molecule: # Extract the fragment into a full-fledged structure: mol = structure._Molecule.extractStructure(frag) len_frag = len(mol.atom) # Add hydrogens and track how many were added (nh_added): build.add_hydrogens(mol) nh_added = len(mol.atom) - len_frag capped_mol = mol.copy() # Replace the just-added H's with He: start = len_frag + 1 stop = start + nh_added for iatom in range(start, stop): atom = capped_mol.atom[iatom] atom.atomic_number = 2 # Transmutation to He # Now determine whether current fragment has rings: if Utilities.molHasRing( capped_mol): # Determine whether it's a ring yield (capped_mol, mol)
[docs]class Deepest1(object): """ Given the Deepest0 information about a ring system, compute the corresponding Deepest1 representation """
[docs] def __init__(self, ring_sys_info0): rep0, name0, capped_smi0, uncapped_smi0, capped_st0, uncapped_st0 = ring_sys_info0 st = capped_st0 # In the ring_sys_info tuple: # capped_st: the st with He for side chains & multiple-bonded atoms on Ar rings # Note that multiple-bonded atoms to non-ar rings were removed, giving gem side-chain pairs # uncapped_st: the capped_st but with H instead of He # capped_smi: the SMILES of the capped_st # uncapped_smi: the SMILES of the uncapped_st # rep: the capped SMILES with He replaced by '*' # name: the rep with brackets removed self.ring_sys_info = self._deepest1(st)
[docs] def getRingSysInfo(self): return self.ring_sys_info
def _deepest1(self, st): """ Return a ring_sys_info object for classification deepest1, given the capped structure for deepest0. Required actions: - Remove multiple-bonded attachments to non-aromatic rings & cap - Remove non-ring atoms attached to multiple-bonded atoms attached to aromatic rings (like H in ArRing=NH) & cap - Change atom type of multiple-bonded attachments to aromatic rings to the cap type - Convert multiple (double) bonds in non-aromatic rings to single bonds - Canonicalize SMILES as non-stereo """ rings, ring_atoms, ring_is_aromatic = Utilities.getRingDataStructures( st) aromatic_ring_atoms, nonaromatic_ring_atoms = Utilities.getAromaticRingAtoms( rings, ring_atoms, ring_is_aromatic) # Create a dictionary of ring atom indices to lists, each list containing # the indices of the rings in the rings list to which the atom in question # belongs: ring_atom_to_rings = Utilities.getRingAtomToRings(ring_atoms, rings) # Create a dictonanary including ring_atom_to_rings, but adding to it # off-ring atoms multiple-bonded to aromatic ring atoms: extended_ring_atom_to_rings = Utilities.getExtendedRingAtomToRings( st, ring_atom_to_rings) atoms_multiple_bonded_to_rings = set( list(extended_ring_atom_to_rings)) - set(list(ring_atom_to_rings)) # Make all bonds between non-aromatic ring atoms single-order: self._reduce_nonaromatic_bond_orders(st, nonaromatic_ring_atoms) # Break bonds between to fragment the molecule: self._breakBonds(st, extended_ring_atom_to_rings, ring_is_aromatic, atoms_multiple_bonded_to_rings) # Mutate atoms (formerly) bonded to rings to the cap type: for iatom in atoms_multiple_bonded_to_rings: atom = st.atom[iatom] # pythonic atom atom.atomic_number = 2 ring_sys_info = self._getCappedRingSys(st) return ring_sys_info def _reduce_nonaromatic_bond_orders(self, st, nonaromatic_ring_atoms): """ Make all bonds between non-aromatic ring atoms single-order. """ # Find nonaromatic-nonaromatic multple bonds and reduce them to single-order: for iatom in nonaromatic_ring_atoms: atom1 = st.atom[iatom] for bond in atom1.bond: if bond.order > 1: atom2 = bond.atom2 iatom2 = atom2.index if iatom2 in nonaromatic_ring_atoms: bond.order = 1 # We add hydrogens here so that we don't end up with caps later. # Since these are added to the end of the st and nothing has been deleted, this shouldn't # render obsolete the membership of heavy-atom arrays, like extended_ring_atom_to_rings: build.add_hydrogens(st) def _breakBonds(self, st, extended_ring_atom_to_rings, ring_is_aromatic, atoms_multiple_bonded_to_rings): """ Break bonds to: - Multiple-bonded attachments to non-aromatic rings - Non-ring atoms attached to multiple-bonded atoms attached to aromatic rings """ atom_pairs_to_break = [] for iatom in atoms_multiple_bonded_to_rings: iring = list(extended_ring_atom_to_rings[iatom])[ 0] # Get the only element of the set of irings atom1 = st.atom[iatom] for bond in atom1.bond: atom2 = bond.atom2 iatom2 = atom2.index if (iatom2 in extended_ring_atom_to_rings) and ( ring_is_aromatic[iring]): pass else: # Either atom2 is an off-ring appendage or else it is in a non-aromatic ring atom_pairs_to_break.append((atom1, atom2)) for atom1, atom2 in atom_pairs_to_break: st.deleteBond(atom1, atom2) def _isEligibleForBreaking(self, atom1, atom2, extended_ring_atom_to_rings, atoms_multiple_bonded_to_rings): """ Return True or False, dictating whether the bond between atom1 and atom2 is to be broken or not. Atom1 is always a ring atom or an atom multiple-bonded to an aromatic ring atom. N.B.: atom1 and atom2 are Schrodinger pythonic atoms, not atom indices (integers). """ ring_atoms = list(extended_ring_atom_to_rings) is_eligible = None iatom = atom1.index jatom = atom2.index if atom2.atomic_number == 1: is_eligible = False elif jatom not in ring_atoms: # Off-ring atom is_eligible = True else: # Ring-ring bonds are broken only if there is no ring the two # atoms share. So biphenyl will be broken into two benzenes, but # naphthalene will not be broken up: iring = extended_ring_atom_to_rings[iatom] jring = extended_ring_atom_to_rings[jatom] if iring & jring: # The two sets have a ring in common is_eligible = False else: is_eligible = True assert is_eligible is not None return is_eligible def _getCappedRingSys(self, st): """ Cap and classify each fragment, then return a list of tuples, each tuple consisting of (capped_smiles, uncapped_smiles) """ capped_st, uncapped_st = self._capFragments(st) capped_smi = smiles_gen_nostereo.getSmiles(capped_st) rep = Utilities.capToStar(capped_smi, smiles_gen_nostereo) name = Utilities.stripBrackets(rep) uncapped_smi = smiles_gen_nostereo.getSmiles(uncapped_st) ring_sys_info = RingSysInfo(rep, name, capped_smi, uncapped_smi, capped_st, uncapped_st) return ring_sys_info def _capFragments(self, st): """ Cap and classify the fragments created by bond breaking, returning each fragment in turn using a Python yield: """ for frag in st.molecule: # Extract the fragment into a full-fledged structure: mol = structure._Molecule.extractStructure(frag) len_frag = len(mol.atom) # Add hydrogens and track how many were added (nh_added): build.add_hydrogens(mol) nh_added = len(mol.atom) - len_frag capped_mol = mol.copy() # Replace the just-added H's with He: start = len_frag + 1 stop = start + nh_added for iatom in range(start, stop): atom = capped_mol.atom[iatom] atom.atomic_number = 2 # Transmutation to He # Now determine whether current fragment has rings: if Utilities.molHasRing(capped_mol): return (capped_mol, mol)
[docs]class Deepest3a(object): """ Given the Deepest1 information about a ring system, compute the corresponding Deepest3a representation. This is an all-star representation of the ring topology, including ring size, without off-ring atoms and without reference to aromaticity. """
[docs] def __init__(self, ring_sys_info0): rep1, name1, capped_smi1, uncapped_smi1, capped_st1, uncapped_st1 = ring_sys_info0 st = capped_st1 rep3a, name3a, capped_smi3a, capped_st3a = self._deepest3a(st) # In the ring_sys_info tuple: # capped_st: the capped_st from Deepest1 # uncapped_st: the uncapped_st from Deepest1 # capped_smi: the smi of the bare ring system with He for every atom # uncapped_smi: same os the uncapped)smi # rep: the capped SMILES with He replaced by '*' # name: the rep with brackets removed self.ring_sys_info = RingSysInfo(rep3a, name3a, capped_smi3a, uncapped_smi1, capped_st3a, uncapped_st1)
[docs] def getRingSysInfo(self): return self.ring_sys_info
def _deepest3a(self, st): """ Return a ring_sys_info object for classification deepest3a, given the capped structure for deepest1. Required actions: - Starting with uncapped_st1, do the following non-destructively: - Break bonds to all off-ring atoms, including H, but don't discard them - Change all atom types to He - Convert all bonds to single - Note that all atom numbers are the same as the uncapped_st1, and correpond to them - Find the ring-containing fragment and obtain a SMILES for it - Change all atom types in the SMILES to ``*`` and canonicalize - In the resulting RingSysInfo: - rep, name, capped_smi and capped_st will be the new Deepest3a values - other members will retain their Deepest1 values """ rings, ring_atoms, ring_is_aromatic = Utilities.getRingDataStructures( st) # Create a set of all atoms; these are atom indices: all_atoms = set([atom.index for atom in st.atom]) # Create a list of non-ring atoms; these are atom indices: nonring_atoms = list(all_atoms - ring_atoms) # Change all bond orders to single: for bond in st.bond: if bond.order > 1: bond.order = 1 # Change all atom types to He, and neutralize them: for atom in st.atom: atom.formal_charge = 0 atom.atomic_number = 2 # Break bonds from ring-atoms to non-ring atoms. # Note that st still contains all atoms, but not all bonds. self._breakBonds(st, nonring_atoms) # Return the molecule containing the ring system: ring_sys_st = Utilities.getRingSysSt(st) # Get the smiles of the ring-system containing fragment: capped_smi = smiles_gen_nostereo.getSmiles(ring_sys_st) # Change all the He to *, and canonicalize: rep = Utilities.capToStar(capped_smi, smiles_gen_nostereo) # Strip brackets to get a more human-readable name: name = Utilities.stripBrackets(rep) return rep, name, capped_smi, st def _breakBonds(self, st, nonring_atoms): """ Break bonds between non-ring atoms and ring atoms. """ atom_pairs_to_break = [] for iatom in nonring_atoms: atom1 = st.atom[iatom] for bond in atom1.bond: atom2 = bond.atom2 iatom2 = atom2.index if ( iatom2 not in nonring_atoms ): # Actually, each nonring atom should have only one bond, and that to a ring atom atom_pairs_to_break.append((atom1, atom2)) for atom1, atom2 in atom_pairs_to_break: st.deleteBond(atom1, atom2)
[docs]class Deepest2(object): """ Given the Deepest3a information about a ring system, compute the corresponding Deepest2 representation. This is a SMARTS that includes the ring skeleton only, but distinguishes between aromatic and aliphatic atoms. """
[docs] def __init__(self, ring_sys_info): # uncapped_st, though obtained from Deepest3a, was actually passed through from Deepest1: rep3a, name3a, capped_smi3a, uncapped_smi1, capped_st3a, uncapped_st1 = ring_sys_info rep2, name2 = self._deepest2(uncapped_st1, rep3a) # In the ring_sys_info tuple: # capped_st: the capped_st from Deepest1 # uncapped_st: the uncapped_st from Deepest1 # capped_smi: the capped_smi from Deepest3a # uncapped_smi: the uncapped smi from Deepest3a # rep: a SMARTS for the bare ring system, each atom either [a] or [A], depending on aromaticity # name: the rep with brackets removed self.ring_sys_info = RingSysInfo(rep2, name2, capped_smi3a, uncapped_smi1, capped_st3a, uncapped_st1)
[docs] def getRingSysInfo(self): return self.ring_sys_info
def _deepest2(self, st, smi): """ Return a ring_sys_info object for classification deepest2, given the capped structure for deepest1, which was propagated, with other information, from deepest3a. Required actions: - Starting with the rep for Deepest1, which is an all-star SMILES, match it every way aginst the uncapped structure for deepest1. - For each match, create a copy of rep, replacing ``*`` with ``a`` or ``A``, depending upon whether the matched atom is aromatic or aliphatic in the original structure. This gives a SMARTS for each match. Store them in a list. - Sort the list of SMARTS and keep the lowest-sorted one as the new rep. - The new name is the rep with the bracket signs removed. """ rings, ring_atoms, ring_is_aromatic = Utilities.getRingDataStructures( st) aromatic_ring_atoms, nonaromatic_ring_atoms = Utilities.getAromaticRingAtoms( rings, ring_atoms, ring_is_aromatic) matches = analyze.evaluate_smarts_canvas(st, smi, stereo=analyze.NO_STEREO, uniqueFilter=False) smas = [] # Will hold newly created SMARTS patterns for match in matches: # list of atom indices arom_seq = [ ] # 'a' or 'A', depending on whether the atom in the match is aliphatic or aromatic c = None for iatom in match: if iatom in aromatic_ring_atoms: c = 'a' elif iatom in nonaromatic_ring_atoms: c = 'A' else: raise ValueError('iatom %d is apparently not a ring atom' % iatom) arom_seq.append(c) sma_ar = [] for c in smi: # Char in the input SMILES string if c == '*': cc = arom_seq.pop(0) sma_ar.append(cc) else: sma_ar.append(c) sma = ''.join(sma_ar) smas.append(sma) smas.sort rep = smas[0] name = Utilities.stripBrackets(rep) return rep, name
[docs]class Deepest4(object): """ Create an St that has a topology exhibiting minimal ring sizes while still preserving bridgeheads and condensed bonds. Preserve all atoms, with the broken bonds. Create an "all-star" representation for this. """
[docs] def __init__(self, ring_sys_info): # uncapped_st, though obtained from Deepest3a, was actually passed through from Deepest1: rep2, name2, capped_smi3a, uncapped_smi1, capped_st3a, uncapped_st1 = ring_sys_info rep4, name4, capped_smi4 = self._deepest4(capped_st3a) # In the ring_sys_info tuple: # capped_st: rebonded capped_st from Deepest1, bonds to off-ring atoms severed, and ring-system # rebonded to smallest rings consistent with topology. H added then all atom types changed to He # Note that heavy-atom numbering does not change from the input st. # uncapped_st: the uncapped_st from Deepest1 # capped_smi: SMILES of the ring-system fragment in capped_st # uncapped_smi: the uncapped smi from Deepest1 # rep: The capped_smi # name: the rep with brackets removed capped_st4 = capped_st3a # capped_st3 has been modified, so here we just call it capped_st4, to confuse the reader # Verify that we've not accidentally added or removed any atoms: assert len(capped_st4.atom) == len(uncapped_st1.atom) self.ring_sys_info = RingSysInfo(rep4, name4, capped_smi4, uncapped_smi1, capped_st4, uncapped_st1)
[docs] def getRingSysInfo(self): return self.ring_sys_info
def _deepest4(self, st): """ Required actions: - Modify capped_st as follows: - Break bonds from ring atoms to all non-ring atoms - Isolate atoms per the rebonding criteria """ # Carry out the rebonding operation exhaustively on the st: self._rebondRings(st) # Return the molecule containing the ring system: ring_sys_st = Utilities.getRingSysSt(st) # Get the smiles of the ring-system containing fragment: capped_smi = smiles_gen_nostereo.getSmiles(ring_sys_st) # Change all the He to *: rep = Utilities.capToStar(capped_smi, smiles_gen_nostereo) # Strip brackets to get a more human-readable name: name = Utilities.stripBrackets(rep) return rep, name, capped_smi def _rebondRings(self, st): last_len_ring_atoms = 0 while True: rings, ring_atoms, ring_is_aromatic = Utilities.getRingDataStructures( st) ring_atom_to_rings = Utilities.getRingAtomToRings(ring_atoms, rings) len_ring_atoms = len(ring_atoms) if len_ring_atoms == last_len_ring_atoms: # No more rebonding is possible break last_len_ring_atoms = len_ring_atoms for iring, iatom_set in enumerate(rings): # rebonded will be True if atoms were removed: rebonded = self._rebondSomeAtom(st, deepcopy(iatom_set), iring, rings, ring_atom_to_rings) if rebonded: writer = structure.MaestroWriter('problem.mae') writer.append(st) writer.close() break def _rebondSomeAtom(self, st, iatom_set, iring, rings, ring_atom_to_rings): """ Attempt to remove an atom from a ring by breaking its bonds in the ring and bonding its neighbors together, subject to several criteria. Return True upon the first successful attampt or False if there were no successful attempts. iatom_set starts out as a set of atoms constituting some ring. """ # Pare iatom_set down to atoms that are exclusively in the ring (eliminating bridgeheads, condesnse bonds): for jring, jatom_set in enumerate(rings): if jring == iring: continue iatom_set -= jatom_set for iatom in iatom_set: if self._rebondThisAtom(st.atom[iatom], ring_atom_to_rings): return True return False def _rebondThisAtom(self, atom_i, ring_atom_to_rings): # Put the (pythonic) atoms bonded to iatom in an array and make sure there are exactly two: assert len(atom_i.bond) == 2 atom_j, atom_k = [b.atom2 for b in atom_i.bond] should_rebond = self._shouldRebond(atom_j, atom_k, ring_atom_to_rings) if should_rebond: atom_pairs_to_break = [] for bond in atom_i.bond: atom1 = bond.atom1 atom2 = bond.atom2 atom_pairs_to_break.append((atom1, atom2)) for atom1, atom2 in atom_pairs_to_break: atom1.deleteBond(atom2) atom_j.addBond(atom_k, 1) def _shouldRebond(self, atom_j, atom_k, ring_atom_to_rings): """ Inputs are two pythonic ring atoms bonded to some other ring atom. Returns true if that other ring atom should be disconnected from these two neighbors and the neigbors bonded together. Else, returns False. """ # Test for whether the two bonded atoms are in a 3-membered ring: for bond in atom_j.bond: atom2 = bond.atom2 if atom_k == atom2: # The two bonded atoms are part of a a 3-membered ring. # Removing iatom would destroy this ring, so we don't do so. return False # Atom indices in st: jatom = atom_j.index katom = atom_k.index # Sets of ring indices that the bonded atoms belong to: jringset = ring_atom_to_rings[jatom] kringset = ring_atom_to_rings[katom] if len(jringset) > 1 and len(kringset) > 1: if jringset == kringset: return False return True
[docs]class Deepest3b(object): """ Given the Deepest4 information about a ring system, compute the corresponding Deepest3a representation. This is a SMARTS that includes the ring skeleton only, and distinguishes between aromatic and aliphatic atoms, where we are talking about the skeleton of the Deepest4 represntation, where rings have been reduced to their minimal sizes consistent with saving bridgehead adtoms and condensed bonds. """
[docs] def __init__(self, ring_sys_info): rep4, name4, capped_smi4, uncapped_smi1, capped_st4, uncapped_st1 = ring_sys_info rep3b, name3b = self._deepest3b(rep4, capped_st4, uncapped_st1) # In the ring_sys_info tuple: # capped_st: the capped_st from Deepest1 # uncapped_st: the uncapped_st from Deepest4 # capped_smi: the capped_smi from Deepest4 # uncapped_smi: the uncapped smi from Deepest1 # rep: a SMARTS for the bare ring system, each atom either [a] or [A], depending on aromaticity # name: the rep with brackets removed self.ring_sys_info = RingSysInfo(rep3b, name3b, capped_smi4, uncapped_smi1, capped_st4, uncapped_st1)
[docs] def getRingSysInfo(self): return self.ring_sys_info
def _deepest3b(self, smi, capped_st, uncapped_st): """ Return a ring_sys_info object for classification Deepest3b, given the capped structure for deepest4, which was propagated, with other information, from deepest4. Required actions: - Starting with the rep for Deepest4 (smi), which is an all-star SMILES, match it every way aginst the uncapped_st, which was propagated from Deepest4. - For each match, create a copy of rep, replacing ``*`` with ``a`` or ``A``, depending upon whether the matched atom number is aromatic or aliphatic in the uncapped_smi, which was propagated from Deepest1 and which retains its aromaticity information - This gives a SMARTS for each match. Store the SMARTS in a list. - Sort the list of SMARTS and keep the lowest-sorted one as the new rep. - The new name is the rep with the bracket signs removed. """ rings, ring_atoms, ring_is_aromatic = Utilities.getRingDataStructures( uncapped_st) aromatic_ring_atoms, nonaromatic_ring_atoms = Utilities.getAromaticRingAtoms( rings, ring_atoms, ring_is_aromatic) matches = analyze.evaluate_smarts_canvas(capped_st, smi, stereo=analyze.NO_STEREO, uniqueFilter=False) smas = [] # Will hold newly created SMARTS patterns for match in matches: # list of atom indices arom_seq = [ ] # 'a' or 'A', depending on whether the atom in the match is aliphatic or aromatic c = None for iatom in match: if iatom in aromatic_ring_atoms: c = 'a' elif iatom in nonaromatic_ring_atoms: c = 'A' else: raise ValueError('iatom %d is apparently not a ring atom' % iatom) arom_seq.append(c) sma_ar = [] for c in smi: # Char in the input SMILES string if c == '*': cc = arom_seq.pop(0) sma_ar.append(cc) else: sma_ar.append(c) sma = ''.join(sma_ar) smas.append(sma) smas.sort rep = smas[0] name = Utilities.stripBrackets(rep) return rep, name
[docs]class Deepest5(object): """ Rep is the sssr class ('1_sssr', '2_sssr', or whatever) of the ring system. """
[docs] def __init__(self, ring_sys_info): rep3b, name3b, capped_smi4, uncapped_smi1, capped_st4, uncapped_st1 = ring_sys_info rep5, name5 = self._deepest5(uncapped_st1) # In the ring_sys_info tuple: # capped_st: the capped_st from Deepest1 # uncapped_st: the uncapped_st from Deepest4 # capped_smi: the capped_smi from Deepest4 # uncapped_smi: the uncapped smi from Deepest1 # rep: a name like '2sssr' # name: same as t he rep self.ring_sys_info = RingSysInfo(rep5, name5, capped_smi4, uncapped_smi1, capped_st4, uncapped_st1)
[docs] def getRingSysInfo(self): return self.ring_sys_info
def _deepest5(self, st): nrings = len(st.ring) rep = '%d sssr' % nrings name = rep return rep, name
[docs]class Deepest6(object): """ Rep is "All_structures" """
[docs] def __init__(self, ring_sys_info): rep5, name5, capped_smi4, uncapped_smi1, capped_st4, uncapped_st1 = ring_sys_info rep6, name6 = self._deepest6() # In the ring_sys_info tuple: # capped_st: the capped_st from Deepest4 # uncapped_st: the uncapped_st from Deepest1 # capped_smi: the capped_smi from Deepest4 # uncapped_smi: the uncapped smi from Deepest1 # rep: always "All_structures" # name: same as the rep self.ring_sys_info = RingSysInfo(rep6, name6, capped_smi4, uncapped_smi1, capped_st4, uncapped_st1)
[docs] def getRingSysInfo(self): return self.ring_sys_info
def _deepest6(self): rep = 'All ring systems' name = rep return rep, name
[docs]def get_next_partition_array( reader, log_f=open(os.devnull, 'wb'), # noqa: M511 smi_f=open(os.devnull, 'wb')): # noqa: M511 for ist, st in enumerate(reader, start=1): get_partitions = GetPartitions(st, ist, log_f, smi_f) for partition_ar in get_partitions.getNextFormattedNameArray(): yield partition_ar
[docs]def main(): """ Requires a single cmdline arg: a structure-file name. Writes output .smi file using the file-name prefix of the input file, prepended to -cores.smi. So foo.sdf creates foo-cores.sdf; also foo-partitions.sdf and foo.log The .partitions output contains the bare results, which is probably what you want to read into another program. """ sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 1) # line-buffer sys.stderr = sys.stdout # Check that cmdline is kosher (halal, if you prefer): if len(sys.argv) != 2: print('decompose_molecules.py takes a single cmdline argument,' + ' an input-structure-file name.') sys.exit(1) input_fname = sys.argv[1] # Make sure input file is really a structure file: fmt = fileutils.get_structure_file_format(input_fname) if fmt is None: print( 'The supplied file name, "%s", is not a valid structure-file name.' % (input_fname,)) sys.exit(1) # Create names of output files: basename = fileutils.get_basename(input_fname) smi_fname = basename + '-cores.smi' # For import to Maestro, to extent possible smi_f = open(smi_fname, 'wb') partition_fname = basename + '.partitions' # Array of array of class names for import to DB partition_f = open(partition_fname, 'wb') image_fname = basename + '-image.smi' # SMILES for production of images image_f = open(image_fname, 'wb') image_set = set() reader = structure.StructureReader(input_fname) for array in get_next_partition_array(reader, sys.stdout, smi_f): partition_f.write('%s\n' % array) print('array=', array) for ar in array[2:]: # Skip levels 0 and 1 print('ar=', ar) for smi in ar: if smi in image_set: continue image_set.add(smi) image_f.write('%s\n' % smi)
if __name__ == '__main__': main()