Source code for schrodinger.application.matsci.sculptcomplex

"""
Utilities for sculpt complex.

Copyright Schrodinger, LLC. All rights reserved.
"""

import argparse
import itertools
import random
import re
from collections import namedtuple

import numpy

from schrodinger import structure
from schrodinger.application.matsci import buildcomplex
from schrodinger.application.matsci import etatoggle as tesu
from schrodinger.application.matsci import msutils
from schrodinger.application.matsci.nano import constants
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.structutils import minimize
from schrodinger.structutils import rmsd
from schrodinger.structutils import transform

SITE_PROP = 'i_matsci_coordination_site'
ANCHOR_PROP = 'i_matsci_anchor_index'

HYDROGEN = 'H'
FRAG_DICT = constants.Constants().TERMDICT.copy()
FRAG_DICT['hydrogen'] = [HYDROGEN, HYDROGEN]
FRAG_INFO = ', '.join(['"%s"' % x for x in FRAG_DICT.keys()])

SiteAtom = namedtuple('SiteAtom',
                      ['entry_id', 'element', 'entry_index', 'ws_index'])


[docs]class SculptingError(Exception): pass
[docs]class CoordinationInfo(object): """ Holds and yields information about atoms and groups that coordinate at specific sites """
[docs] def __init__(self): """ Create a CoordinationInfo object """ self.sites = []
[docs] def addSite(self, site): """ Add information about the next site. Information should be added in the order of the sites i.e. add information about site 1 first, then site 2... :type site: `SiteAtom` or str :param site: For sites occupied by ligand atoms, pass in the SiteAtom for the ligand atom. For sites occupied by capping groups, pass in the str name of the capping group. """ self.sites.append(site)
[docs] def ligandSites(self): """ A generator over all the sites attached to the ligand :rtype: (int, `SiteAtom`) :return: Iterates over pairs of index number (zero-based) and the SiteAtom object associated with that site """ for index, site in enumerate(self.sites): if isinstance(site, SiteAtom): yield index, site
[docs] def cappedSites(self): """ A generator over all the sites capped by capping groups :rtype: (int, str) :return: Iterates over pairs of index number (zero-based) and the name of the capping group. The name will be a key to the FRAG_DICT dictionary """ for index, site in enumerate(self.sites): if isinstance(site, str): yield index, site
[docs] def allGroupsAreCapping(self): """ Check if all sites are capped (i.e. there are no ligand attachments) :rtype: bool :return: True if all sites are capped """ return not bool(list(self.ligandSites()))
[docs]def validate_coord_flag(coordinators): """ Validate that the -coord flag argument is correct and parse its information :type coordinators: str :param coordinators: The command line argument for the -coord flag :rtype: (`CoordinationInfo`, list[str]) :return: The CoordinationInfo object with the command line information and strings of corresponding VSEPR geometries. The strings will be from the buildcomplex geometry constants (OCTAHEDRAL, etc.) :raise `argparse.ArgumentTypeError`: If something is wrong with the input """ min_slots = min(buildcomplex.SLOTS_TO_GEOMS.keys()) max_slots = max(buildcomplex.SLOTS_TO_GEOMS.keys()) tokens = coordinators.split(',') numtokens = len(tokens) if numtokens < min_slots or numtokens > max_slots: raise argparse.ArgumentTypeError( 'The number of -coord sites must correspond to a supported ' 'VSEPR geometry - at least %d and no more than %d. Got %d instead.' % (min_slots, max_slots, numtokens)) geometries = buildcomplex.SLOTS_TO_GEOMS[numtokens] coordination_info = CoordinationInfo() for token in tokens: try: index = int(token) except ValueError: if token in FRAG_DICT: value = token else: raise argparse.ArgumentTypeError( 'Each WHAT value for the -coord flag must be either an ' 'integer or one of the following values: %s. Got %s instead' '.' % (FRAG_INFO, token)) else: if index < 1: raise argparse.ArgumentTypeError( 'Integer WHAT values for the -coord flag must be greater ' 'than 0. Got %d instead.' % index) value = SiteAtom(entry_id=None, element=None, ws_index=None, entry_index=index) coordination_info.addSite(value) return coordination_info, geometries
[docs]class Sculptor(object): """ Does the work of sculpting a ligand around a metal atom and capping open valences with small capping groups. """
[docs] def __init__(self, element, coordination_info, geometry, del_h, title, enumerating): """ :type element: str :param element: The atomic symbol of the central atom :type coordination_info: `CoordinationInfo` :param coordination_info: The CoordinationInfo that gives the information about each coordination site :type geometry: str :param geometry: One of the VSEPR string constants from schrodinger.application.matsci.buildcomplex (OCTAHEDRAL, etc.) :type del_h: bool :param del_h: Whether to delete H atoms at the ligand coordination sites :type title: str :param title: The base title for structures created by this Sculptor. :type enumerating: bool :param enumerating: True if enumerating multiple ligand structures """ self.element = element self.coordination_info = coordination_info self.geometry = geometry self.del_h = del_h self.title = title self.enumerating = enumerating self.locations = self.fixLocations()
[docs] def sculpt(self, ligstruct, optimize=True): """ Create a sculpted complex from the given ligand structure :type ligstruct: `schrodinger.structure.Structure` :param ligstruct: The structure of the ligand :type optimize: bool :param optimize: whether to perform a standard geometry optimization on the final sculpted complex :raise SculptingError: Too many metal-to-ligand bonds :rtype: `schrodinger.structure.Structure` :return: The structure of the complex """ self.struct = None # Gather data necessary for building the complex if self.coordination_info.allGroupsAreCapping(): self.ligands = [] else: self.ligands = self.getLigandStructure(ligstruct) self.alignLigands() # Create the unminimized ligand structure self.struct = self.createBasicStructure() self.addCappingLigands() self.bondLigandsToMetal() self.addIdealAnchors() self._addCoordinationNoise() # for complexes with eta bound ligands if the central # atom is not a metal, like for example is a carbon, then # the sculpting minimizer below will raise a force field # error so during that step temporarily use a metal has_eta = any([ atom.atomic_number == -2 for atom, site in self.coordinatingAtoms(self.struct) ]) metal_idxs = analyze.evaluate_asl(self.struct, 'metals') use_temp_metal = has_eta and 1 not in metal_idxs if use_temp_metal: self.struct.atom[1].element = 'Ir' # Use the minimizer to force ligands to ideal binding positions try: self.minimizeIdealComplex(ligstruct.title) except ValueError: msg = (f'Error for ligand {ligstruct.title}:\nUnable to determine ' 'Lewis structure so unable to create complex') raise SculptingError(msg) except RuntimeError as err: self._handleMinimizationRuntimeError(err) self.deleteAnchors() # for complexes with eta bound ligands toggle from the input # dummy atom representation prior to minimization if has_eta: try: tesu.toggle_structure(self.struct) except ValueError as err: raise SculptingError(str(err)) if use_temp_metal: self.struct.atom[1].element = self.element # Fully optimize the clean (no dummy atoms, no restraints) complex, # the exception can happen for a central "metal" carbon eta bound to # ethene which results in carbon having a valence of 3 rather than 4 if optimize: try: buildcomplex.minimize_complex(self.struct, forcefield=minimize.OPLS3) except ValueError: pass # Set the title title = self.title if self.enumerating: title += ' ' + ligstruct.title self.struct.title = title self._cleanAtomProperties(self.struct) return self.struct
def _handleMinimizationRuntimeError(self, err): """ Raise a better error when minimization runtime errors occur :param RuntimeError err: The error that was raised :raise SculptingError: An error with an explanatory message """ error = str(err) result = re.search(r'overlapping atoms (\d+) (\d+)', error) if result: atom1 = result.group(1) atom2 = result.group(2) msg = ('Unable to minimize complex geometry due to overlapping ' f'atoms {atom1} and {atom2}') raise SculptingError(msg) else: # This is not the recognized overlapping atoms error raise SculptingError(error) def _cleanAtomProperties(self, struct): """ Remove atom properties used in sculpting from this structure to avoid stale properties :type struct: `schrodinger.structure.Structure` :param struct: The structure to remove atoms properties from """ # See MATSCI-5406 for prop in (SITE_PROP, ANCHOR_PROP): msutils.remove_atom_property(struct, prop) def _addCoordinationNoise(self): # Add a tiny bit of noise to avoid any complete overlap between site # atoms and their target dummy atoms because atoms at the exact same # location cause a minimizer error, even if one is a dummy for atom in self.struct.atom[1].bonded_atoms: atom.xyz = [a + random.uniform(0.001, 0.003) for a in atom.xyz]
[docs] def getLigandStructure(self, struct): """ Fix up struct to prepare it for binding in the complex :type struct: `schrodinger.structure.Structure` :param struct: The raw structure of the ligand :rtype: list :return: The fixed up struct. A list is returned to maintain API compatibitily with code that was implemented for self.getLigandsFromMultiStructures. Left this way in case we implement multiple ligand entries in the future. """ struct = struct.copy() # MATSCI-5406 self._cleanAtomProperties(struct) # Mark the coordinating atoms for sitenum, info in self.coordination_info.ligandSites(): struct.atom[info.entry_index].property[SITE_PROP] = sitenum # Remove hydrogens if requested if self.del_h: delatoms = [] for atom, site in self.coordinatingAtoms(struct): for neighbor in atom.bonded_atoms: if neighbor.element == 'H': delatoms.append(neighbor.index) struct.deleteAtoms(delatoms) # Split the ligand structures up if the coordinating sites are on # different molecules - must do this after deleting hydrogens just in # case that creates new molecules (MATSCI-9739) ligands = [struct] mols = {x.molecule_number for x, y in self.coordinatingAtoms(struct)} if len(mols) > 1: delatoms = [] mols = sorted(list(mols)) # We extract a new ligand structure for each molecule bound to the # metal after the first. This leaves the first bound molecule and # any spectator molecules (those not bound to the metal) in the # first structure. for molnum in mols[1:]: molecule = struct.molecule[molnum] delatoms.extend(molecule.getAtomIndices()) ligands.append(molecule.extractStructure()) struct.deleteAtoms(delatoms) return ligands
[docs] def coordinatingAtoms(self, struct): """ A generator over all the atoms in the structure that bind or will bind to the metal :type struct: `schrodinger.structure.Structure` :param struct: The structure containing the atoms. Can be either a ligand or the full complex. :rtype: `schrodinger.structure._StructureAtom`, int :return: An atom that binds/will bind to the metal and the coordination site it occupies/will occupy """ for atom in struct.atom: site = atom.property.get(SITE_PROP) if site is not None: yield atom, site
[docs] def fixLocations(self): """ Reorder the ideal locations list to correspond to the reordered coordination sites in the siteselectors. :rtype: list :return: The list of ideal coordination sitess reordered the to same order as shown in the GUI site selection diagrams. Each item of the list is an XYZ tuple giving the coordination site if the metal is at (0, 0, 0) """ raw_locations = buildcomplex.IDEAL_SLOTS[self.geometry] if self.geometry == buildcomplex.OCTAHEDRAL: order = [0, 1, 4, 3, 2, 5] elif self.geometry == buildcomplex.SQUARE_PLANAR: order = [0, 1, 3, 2] else: order = range(len(raw_locations)) return [raw_locations[x] for x in order]
[docs] def alignLigands(self): """ Align the ligands to good binding location, ensuring that the coordinating atoms are near (as possible) to the coordination sites and that the ligand is flipped to a reasonable orientation """ framework = structure.create_new_structure() for loc in self.locations + [(0.0, 0.0, 0.0)]: framework.addAtom('Du', *loc) for lig in self.ligands: siteatoms = [x[0] for x in self.coordinatingAtoms(lig)] if len(siteatoms) == 1: self._alignMonodentate(lig, siteatoms, framework) elif len(siteatoms) == 2: self._alignBidentate(lig, siteatoms, framework) else: self._alignManydentate(lig, siteatoms, framework)
def _alignMonodentate(self, ligand, siteatoms, framework): """ Align monodentate ligands to good binding location, ensuring that the anticipated bond from ligand to metal overlaps the ideal bond direction. Note that the rotation of the ligand about this bond is not optimized. :type ligands: list :param ligands: List of ligand structures :type siteatoms: list :param siteatoms: Each item is the _StructureAtom object for a coordinating atom :type framework: `schrodinger.structure.Structure` :param framework: A mock structure containing a dummy at each ideal coordination location (in the same order as the coordination sites) and a dummy at the metal atom position as the final atom. """ siteatom = siteatoms[0] # Add a dummy atom to the ligand structure at optimal mythical metal # position. Then we can superimpose that dummy onto the metal. site = (siteatom.index, 0) ligst = buildcomplex.Ligand(ligand, sites=[site], slots=[0]) dative_site = ligst.struct.atom[ligst.struct.atom_total].xyz dummy = ligand.addAtom('Du', *dative_site) # Now superimpose the atom-dummy_metal bond onto the framework pair1 = (siteatom.property[SITE_PROP] + 1, framework.atom_total) pair2 = (siteatom.index, dummy.index) rmsd.superimpose_bond(framework, pair1, ligand, pair2) ligand.deleteAtoms([dummy.index]) def _alignBidentate(self, ligand, siteatoms, framework): """ Align bidentate ligands to good binding location, ensuring that the coordinating atoms are near the coordination sites and that the ligand is flipped so that the bulk of the ligand points away from the metal. :type ligands: list :param ligands: List of ligand structures :type siteatoms: list :param siteatoms: Each item is the _StructureAtom object for a coordinating atom :type framework: `schrodinger.structure.Structure` :param framework: A mock structure containing a dummy at each ideal coordination location (in the same order as the coordination sites) and a dummy at the metal atom position as the final atom. """ indexes = [x.index for x in siteatoms] # Find the centroid of the atoms that connect the two sites. We want # this spot to be exactly opposite of the metal position. path = analyze.find_shortest_bond_path(ligand, *indexes) pcentroid = transform.get_centroid(ligand, atom_list=path)[:3] # Find the location that is the opposite of this centroid. That's where # we want the metal atom to be. Doing this ensures that the bulk of the # ligand points away from the metal rather than overlapping it. coords1 = numpy.array(siteatoms[0].xyz) coords2 = numpy.array(siteatoms[1].xyz) coordscent = numpy.array(pcentroid) vec1 = coords1 - coordscent vec2 = coords2 - coordscent vecsum = vec1 + vec2 opposite = coordscent + vecsum # Superimpose the two binding sites plus metal site dummy = ligand.addAtom('Du', *opposite) frame_trio = (framework.atom_total, siteatoms[0].property[SITE_PROP] + 1, siteatoms[1].property[SITE_PROP] + 1) lig_trio = (dummy.index, indexes[0], indexes[1]) rmsd.superimpose(framework, frame_trio, ligand, lig_trio, move_which=rmsd.CT) ligand.deleteAtoms([dummy.index]) def _alignManydentate(self, ligand, siteatoms, framework): """ Align tri or tetradentate ligands to good binding location, ensuring that the coordinating atoms are near the coordination sites. We do this by simply superimposing the atoms on their binding sites. :type ligands: list :param ligands: List of ligand structures :type siteatoms: list :param siteatoms: Each item is the _StructureAtom object for a coordinating atom :type framework: `schrodinger.structure.Structure` :param framework: A mock structure containing a dummy at each ideal coordination location (in the same order as the coordination sites) and a dummy at the metal atom position as the final atom. """ sitedexes = [x.index for x in siteatoms] framedexes = [x.property[SITE_PROP] + 1 for x in siteatoms] rmsd.superimpose(framework, framedexes, ligand, sitedexes, move_which=rmsd.CT)
[docs] def createBasicStructure(self): """ Create the basic structure of the complex - metal atom plus ligands without the ligands actually bound to the metal and without optimizing their positions :rtype: `schrodinger.structure.Structure` :return: The basic complex structure """ struct = structure.create_new_structure() metal_atom = struct.addAtom('C', 0.0, 0.0, 0.0) buildcomplex.transmute_atom(metal_atom, self.element) for ligand in self.ligands: struct.extend(ligand) return struct
[docs] def addCappingLigands(self): """ Add capping groups to the requested positions """ for index, info in self.coordination_info.cappedSites(): group, capper = FRAG_DICT[info] if not capper: # This slot is not occupied continue # Add an H atom at the cap position - this will be replaced by # the capper fragment to_atom = self.struct.addAtom('H', *self.locations[index]) self.struct.addBond(1, to_atom.index, 1) if capper == HYDROGEN: continue build.attach_fragment(self.struct, 1, to_atom.index, group, capper)
[docs] def bondLigandsToMetal(self): """ Bond the ligands to the metal atom """ for atom, site in self.coordinatingAtoms(self.struct): try: self.struct.addBond(1, atom.index, 0) except RuntimeError as err: # Runtime can be raised if one of the atoms has too many bonds issue = str(err).split('\n')[-2].strip() raise SculptingError( f'Could not bond ligand to metal:\n{issue}') buildcomplex.fix_metal_bond_orders(self.struct, 1)
[docs] def addIdealAnchors(self): """ Add dummy atoms at each of the ideal coordination locations """ for atom, site in self.coordinatingAtoms(self.struct): location = self.locations[site] anchor = self.struct.addAtom('Du', *location) atom.property[ANCHOR_PROP] = anchor.index
[docs] def minimizeIdealComplex(self, ligname): """ Minimize the complex using the passed in minimizer :type ligname: str :param ligname: The title of the ligand structure :rtype: `schrodinger.structutils.minimize.Minimzer` :return: The minimizer with structure and restraints set """ with minimize.minimizer_context(struct=self.struct, no_zob_restrain=True) as mizer: # Make sure the metal atom doesn't move mizer.addPosRestraint(1, 20000.) for atom, site in self.coordinatingAtoms(self.struct): anchor = self.struct.atom[atom.property[ANCHOR_PROP]] # Make sure dummies marking the ideal locations don't move mizer.addPosRestraint(anchor.index, 20000.) # Add the restraints that pull the actual atoms towards the ideal # locations, while implementing MATSCI-6955 it was found that # for eta bound ligands, represented with a central dummy atom # ZOB-ed to the haptic ring atoms, if the real-anchor dummy # atom pair is allowed arbitrary close then it triggers # a force field overlapping atoms error, so pass a small number # rather than zero mizer.addDistanceRestraint(atom.index, anchor.index, 200., 1e-3) # for eta bound ligands, represented with a central dummy atom # ZOB-ed to the haptic ring atoms, restrain the haptic system, # the 1.4 Ang. distance is a reference centroid to carbon distance # for benzene if atom.atomic_number == -2: neighbors = [ bond.atom2 for bond in atom.bond if bond.type == structure.BondType.Zero ] for neighbor in neighbors: mizer.addDistanceRestraint(atom.index, neighbor.index, 200., 1.4) mizer.addAngleRestraint(1, atom.index, neighbor.index, 20000., 90.) for neighbor_i, neighbor_j in itertools.permutations( neighbors, 2): angle = self.struct.measure(neighbor_i, atom, neighbor_j) mizer.addAngleRestraint(neighbor_i.index, atom.index, neighbor_j.index, 20000., angle) # It may not be necessary to run all the minimizations, but they are # fast and ensure the complex achieves ideal geometry. Definitely once # is not enough for good complex geometries from poor starting points. mizer.minimize() mizer.minimize() mizer.minimize()
[docs] def deleteAnchors(self): """ Delete the dummy atoms that mark the ideal coordination sites """ dummies = [ x.property[ANCHOR_PROP] for x, y in self.coordinatingAtoms(self.struct) ] self.struct.deleteAtoms(dummies)