Source code for schrodinger.application.matsci.buildcomplex2d

"""
Utilities for the 2D-to-3D `*sdf` to `*mae` file conversion for a
metal complex.

Copyright Schrodinger, LLC. All rights reserved.
"""

import argparse
import random
import re
import numpy
import itertools
from schrodinger import structure
from schrodinger.infra import mm
from schrodinger.infra import fast3d
from schrodinger.test import ioredirect
from schrodinger.utils import fileutils
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.structutils import measure
from schrodinger.structutils import minimize
from schrodinger.structutils import ringspear
from schrodinger.structutils import rmsd
from schrodinger.structutils import transform
from schrodinger.application.matsci import buildcomplex
from schrodinger.application.matsci import jobutils
from schrodinger.application.matsci import parserutils
from schrodinger.application.matsci import etatoggle as tesu
from schrodinger.application.matsci import reaction_workflow_utils as rxnwfu
from schrodinger.application.matsci import sculptcomplex as scu
from schrodinger.application.matsci import swap_fragments_utils as sfu

my_random = random.Random()
my_random.seed(parserutils.RANDOM_SEED_DEFAULT)

COORDINATION_DICT = {buildcomplex.TETRAHEDRAL: 4}

# the following is used in Schrodinger's V3000 SDF writer as well
# as well as other programs
STD_V3000_SDF_V_LINE = '  0  0  0     0  0            999 V3000\n'

# instead of using OPLS_2005 FF which is used in the complex builder
# GUI use OPLS3 (see test2 in MATSCI-6325 and OMC002332 in MATSCI-6955
# which experience issues with OPLS_2005)
FFLD_VERSION = minimize.OPLS3

DEFAULT_EPSILON = 2  # kcal/mol


[docs]class DummiesToHydrogens: """ Context manager to temporarily convert dummy atoms to hydrogens. """
[docs] def __init__(self, st): # look for dummy atoms that have at most one single bond self.dummy_atoms = [] for atom in st.atom: if atom.atomic_number != ComplexSdfToMae.DUMMY_ATOMIC_NUMBER: continue state = False for bond in atom.bond: if bond.order == 1: if not state: state = True else: state = False break if state: self.dummy_atoms.append(atom)
def __enter__(self): for dummy_atom in self.dummy_atoms: dummy_atom.atomic_number = 1 def __exit__(self, *args): for dummy_atom in self.dummy_atoms: dummy_atom.atomic_number = ComplexSdfToMae.DUMMY_ATOMIC_NUMBER
[docs]class RemoveMetalBonds: """ Context manager to temporarily remove bonds to the metal atom in a complex. """
[docs] def __init__(self, st): self.st = st self.bonds = [ (bond.atom1, bond.atom2, bond.order) for bond in st.atom[1].bond ]
def __enter__(self): for idx, jdx, order in self.bonds: self.st.deleteBond(idx, jdx) def __exit__(self, *args): for idx, jdx, order in self.bonds: self.st.addBond(idx, jdx, order)
[docs]class ComplexSdfToMaeException(Exception): pass
# the eta parts of the following class are modeled after custom script # sdf2etacomplex.py (see MATSCI-4423)
[docs]class ComplexSdfToMae: """ Manage the 2D-to-3D `*sdf` to `*mae` file conversion for a metal complex. """ SD_PROP_PATTERN = re.compile(r'><(.*)>') SD_PROP_STARTER = 's_sd_' DUMMY_ATOMIC_NUMBER = -2 BIND_PROP = 'b_user_metal_neighbor' HAPTIC_ATOM_PROP = 'b_user_haptic_atom'
[docs] def __init__(self, sdf_file, logger=None): """ Create an instance. :type sdf_file: str :param sdf_file: the 2D `*sdf` file :type logger: logging.Logger or None :param logger: output logger or None if there isn't one """ self.sdf_file = sdf_file self.logger = logger self.st = self.read_sdf(self.sdf_file)
[docs] @staticmethod def read_sdf(sdf_file): """ Wrapper to read an `*sdf` file. :type sdf_file: str :param sdf_file: the `*sdf` file :raise ComplexSdfToMaeException: if there is an issue :rtype: `schrodinger.structure.Structure` :return: the structure """ sdf_file_type = type(sdf_file).__name__ with open(sdf_file) as afile: lines = afile.readlines() # try to read the file as is, a StopIteration will be raised if # there is a formatting issue, attempt to fix a known formatting # issue and read the newly formatted file, if there is still a # formatting issue then give up try: st = structure.Structure.read(sdf_file) # see MATSCI-8823 if st.atom_total == 0: raise StopIteration except StopIteration: with fileutils.tempfilename(prefix='tmp', suffix='.sdf') as tmp_sdf_file: with open(tmp_sdf_file, 'w') as afile: if not re.match(r'\s*\n', lines[0]): afile.write('\n') for line in lines: if 'V3000' in line: afile.write(STD_V3000_SDF_V_LINE) continue line = re.sub(r'^M\s+', r'M ', re.sub(r'^\s+', r'', line)) if not line: line = '\n' afile.write(line) try: st = structure.Structure.read(tmp_sdf_file) # see MATSCI-8823 if st.atom_total == 0: raise StopIteration except StopIteration: msg = (f'The {sdf_file} file has an invalid format.') # raise the following exception with no previous context, # this suppresses error messages from exceptions raised # during the raising of the exception that is currently # being caught raise ComplexSdfToMaeException(msg) from None except Exception as err: msg = str(err) raise except Exception as err: msg = str(err) raise # structure.Structure.read `*sdf` meta-data like "><X>\nY\n" is captured # as st.property['s_sd_X'] = 'Y', but go through the file and manually # set properties that haven't been set, for example like when 'Y' is a # single integer for idx, line in enumerate(list(lines)): match = re.match(ComplexSdfToMae.SD_PROP_PATTERN, line) if match: key = ComplexSdfToMae.SD_PROP_STARTER + match.group(1).replace( ' ', '_') if st.property.get(key) is None: value = lines[idx + 1].strip('\n') st.property[key] = value st.title = fileutils.get_basename(sdf_file) return st
def _getClosestRing(self, dummy_atom, all_ring_idxs): """ Return the atom indices of the ring that is closest to the given dummy atom. :type dummy_atom: `schrodinger.structure._StructureAtom` :param dummy_atom: the dummy atom :type all_ring_idxs: list :param all_ring_idxs: contains lists of atom indices for rings :rtype: list :return: the atom indices for the closest ring """ # handle the case where neighbor is in multiple rings _, _, neighbor = measure.get_shortest_distance(self.st, atoms=[dummy_atom.index]) closest_dist = numpy.inf closest_ring_idxs = [] for ring_idxs in all_ring_idxs: if neighbor in ring_idxs: avg_dist = numpy.mean([ self.st.measure(dummy_atom.index, idx) for idx in ring_idxs ]) if avg_dist < closest_dist: closest_dist = avg_dist closest_ring_idxs = ring_idxs return closest_ring_idxs def _getClosestNonRingEtaIdxs(self, dummy_atom): """ Return the atom indices of the non-ring eta atoms that are closest to the given dummy atom. :type dummy_atom: `schrodinger.structure._StructureAtom` :param dummy_atom: the dummy atom :rtype: list :return: the atom indices for the closest non-ring eta atoms """ _, _, neighbor = measure.get_shortest_distance(self.st, atoms=[dummy_atom.index]) neighbor = self.st.atom[neighbor] idxs = [neighbor.index] for bond in neighbor.bond: if bond.order > 1: idxs.append(bond.atom2.index) return idxs def _bondDummies(self): """ Bond dummy atoms to the corresponding ligands. """ all_ring_idxs = [ring.getAtomIndices() for ring in self.st.ring] for atom in self.st.atom: if not atom.property.get(self.BIND_PROP): continue if atom.atomic_number == self.DUMMY_ATOMIC_NUMBER: idxs = self._getClosestRing(atom, all_ring_idxs) for idx in idxs: ring_atom = self.st.atom[idx] # handle carbanions if ring_atom.atomic_number == 6: for bond in ring_atom.bond: if bond.order > 1: break else: ring_atom.formal_charge = -1 if not idxs: idxs = self._getClosestNonRingEtaIdxs(atom) if not idxs: continue for idx in idxs: haptic_atom = self.st.atom[idx] haptic_atom.property[self.HAPTIC_ATOM_PROP] = True # use zob bond to not take up a valence slot self.st.addBond(atom, haptic_atom, structure.BondType.Zero) else: # use single bond to take up a valence slot marker = self.st.addAtom('DU', *self.metal_xyz) self.st.addBond(atom.index, marker.index, structure.BondType.Single) def _defineMetalAttrs(self, idx): """ Define metal attributes. :type idx: int :param idx: the index """ metal_atom = self.st.atom[idx] self.metal_element = metal_atom.element self.metal_xyz = metal_atom.xyz
[docs] def getCenterIdx(self): """ Return the atom index that serves as the complex center, typically a metal atom. :raise ComplexSdfToMaeException: if there is an issue :rtype: int :return: the atom index that serves as the complex center """ metal_idxs = analyze.evaluate_asl(self.st, 'metals') if not metal_idxs: centroid = transform.get_centroid(self.st)[:-1] # from sdf reader complexes with eta bound ligands come # with a dummy atom representation where the dummy is only # bound to the metal pairs = [] for atom in self.st.atom: if atom.atomic_number == -2: neighbors = list(atom.bonded_atoms) metal_idxs = [neighbors[0].index] break pairs.append((atom.index, transform.get_vector_magnitude( numpy.array(atom.xyz) - centroid))) else: # pick the atom closest to the centroid that when deleted # breaks the molecule into multiple molecules pairs = sorted(pairs, key=lambda x: x[1]) for idx in list(zip(*pairs))[0]: tst = self.st.copy() tst.deleteAtoms([idx]) if tst.mol_total > 1: metal_idxs = [idx] break if not metal_idxs: msg = ('Complex must contain a single metal center.') raise ComplexSdfToMaeException(msg) metal_idxs = sorted(metal_idxs, key=lambda x: self.st.atom[x].atomic_number, reverse=True) return metal_idxs[0]
[docs] def prepare(self): """ Prepare the structure. """ metal_idx = self.getCenterIdx() metal_atom = self.st.atom[metal_idx] self._defineMetalAttrs(metal_atom.index) # mark neighboring atoms for atom in metal_atom.bonded_atoms: atom.property[self.BIND_PROP] = True # make ligand molecules self.st.deleteAtoms([metal_atom]) # handle ligand dummy atom bonding self._bondDummies()
def _getBindingSites(self, st): """ Return binding sites for the given structure. :type st: `schrodinger.structure.Structure` :param st: the structure :rtype: list :return: contains atom index pair tuples, the first index is for the atom that binds to the metal and is negated for eta-coordinated ligands, the second index is for the atom that represents the metal positioning relative to the ligand and is 0 for eta-coordinated ligands """ sites = [] for atom in st.atom: if not atom.property.get(self.BIND_PROP): continue if atom.atomic_number == self.DUMMY_ATOMIC_NUMBER: sign, marker_idx = -1, 0 else: for neighbor in atom.bonded_atoms: if neighbor.atomic_number == self.DUMMY_ATOMIC_NUMBER: sign, marker_idx = 1, neighbor.index break sites.append((sign * atom.index, marker_idx)) return sites
[docs] def addHydrogens(self, st): """ Add hydrogens to the given structure. :type st: `schrodinger.structure.Structure` :param st: the structure """ # see MATSCI-7031 - build.add_hydrogens doesn't necessarily # add hydrogens to haptic carbanions idxs = [] for atom in st.atom: if atom.property.get(self.HAPTIC_ATOM_PROP) and \ atom.formal_charge == -1 and atom.atomic_number == 6: n_bonds = sum(1 for bond in atom.bond if bond.type != structure.BondType.Zero) if n_bonds == 2: idxs.append(atom.index) for idx in idxs: atom = st.atom[idx] summed_bond_vec = numpy.array([0., 0., 0.]) for neighbor in atom.bonded_atoms: summed_bond_vec += numpy.array(neighbor.xyz) - numpy.array( atom.xyz) summed_bond_vec = numpy.array(atom.xyz) - summed_bond_vec h_atom = st.addAtom('H', *summed_bond_vec, 'white') st.addBond(atom.index, h_atom.index, structure.BondType.Single) # it is not problematic to add hydrogens to structures # with dummy atoms build.add_hydrogens(st)
def _getPreparedLigand(self, st): """ Return a prepared ligand. :type st: `schrodinger.structure.Structure` :param st: the ligand structure :rtype: `schrodinger.structure.Structure` :return: the prepared ligand structure """ self.addHydrogens(st) # adopt the same strategy used in builderwidgets.ItemRow.setStructure min_energy = numpy.inf min_energy_struct = st for count in range(10): scopy = st.copy() # break symmetry for atom in scopy.atom: atom.z = (my_random.random() - 0.5) / 10. with DummiesToHydrogens(scopy): with ioredirect.IOSilence(): try: mizer = minimize.Minimizer(ffld_version=FFLD_VERSION, struct=scopy, cleanup=False) min_res = mizer.minimize() except ValueError: pass else: if min_res.potential_energy < min_energy: min_energy = min_res.potential_energy min_energy_struct = scopy return min_energy_struct
[docs] def build(self): """ Build the complex. :raise ComplexSdfToMaeException: if there is an issue :rtype: `schrodinger.structure.Structure` :return: the complex """ try: return self.sculptComplexBuild() except ComplexSdfToMaeException as err: if self.logger: msg = ('Sculpt complex has failed with the following error ' 'message. Trying complex builder instead.') self.logger.info(msg) self.logger.warning(str(err)) # it is not clear that there is any real benefit to falling back # on complex builder but keeping the stub for now try: return self.buildComplexBuild() except ComplexSdfToMaeException as err: if self.logger: msg = ('Complex builder has failed with the following error ' 'message.') self.logger.info(msg) self.logger.error(str(err)) raise
[docs] def fixRingSpears(self, st): """ Fix ring-spears. :type st: `schrodinger.structure.Structure` :param st: the structure """ if ringspear.check_for_spears(st, return_bool=True): st.generate3dConformation(require_stereo=False) # uses fast3d if self.logger and ringspear.check_for_spears(st, return_bool=True): msg = ('At least a single ring-spear has been found in the 3D ' 'structure.') self.logger.warning(msg)
[docs] def updateProperties(self, st): """ Update properties on the given structure. :type st: `schrodinger.structure.Structure` :param st: the structure """ for key, value in self.st.property.items(): if key.startswith(self.SD_PROP_STARTER): st.property[key] = value
[docs] def run(self): """ Run. :rtype: `schrodinger.structure.Structure` :return: the structure """ self.prepare() st = self.build() self.fixRingSpears(st) self.updateProperties(st) return st
[docs] def write(self, mae_file=None): """ Write. :type mae_file: str :param mae_file: the 3D `*mae` file :rtype: str :return: the 3D `*mae` file """ mae_file = mae_file or fileutils.get_basename(self.sdf_file) + '.mae' st = self.run() st.write(mae_file) return mae_file
[docs] def sculptComplexBuild(self, sites=None, geometry=None, optimize=True): """ Build the complex using sculpt complex. :type sites: list or None :param sites: contains atom index pair tuples, the first index is for the atom that binds to the metal and is negated for eta-coordinated ligands, the second index is for the atom that represents the metal positioning relative to the ligand and is 0 for eta-coordinated ligands, the order of sites in this list determines the coordination geometry of the complex, if None an arbitrarily ordered list of sites will be determined from the structure :type geometry: str or None :param geometry: the VSEPR geometry from the buildcomplex geometry constants, if None uses the sculpt complex convention of using the last geometry for a given number of slots coming from the order in buildcomplex.IDEAL_SLOTS :type optimize: bool :param optimize: whether to perform a standard geometry optimization on the final sculpted complex :raise ComplexSdfToMaeException: if there is an issue :rtype: `schrodinger.structure.Structure` :return: the complex """ st = self.st.copy() self.addHydrogens(st) # remove potential center markers prepared for build complex, # site indexing doesn't need adjusting if not sites: sites = self._getBindingSites(st) to_remove = [site[1] for site in sites if site[1]] if to_remove: st.deleteAtoms(to_remove) # use first isomer and arbitrary site ordering for now coordinators = ','.join(str(abs(site[0])) for site in sites) try: coordination_info, geometries = scu.validate_coord_flag( coordinators) except argparse.ArgumentTypeError as err: raise ComplexSdfToMaeException(str(err)) if geometry: assert geometry in geometries else: # the convention for sculpt complex has been to use the last # geometry for a given number of slots coming from the order # in buildcomplex.IDEAL_SLOTS geometry = geometries[-1] # del_h has implications regarding the final bond orders of metal-ligand # bonds, for example for a metal-carbon bond, like M-CH2-phenyl, using del_h # will produce a metal carbyne (triple bond) while not using it will not, # using False by default for now del_h = False title = '' enumerating = False obj = scu.Sculptor(self.metal_element, coordination_info, geometry, del_h, title, enumerating) try: st = obj.sculpt(st, optimize=optimize) except scu.SculptingError as err: raise ComplexSdfToMaeException(str(err)) return st
[docs] def buildComplexBuild(self): """ Build the complex using build complex. :raise ComplexSdfToMaeException: if there is an issue :rtype: `schrodinger.structure.Structure` :return: the complex """ builder = buildcomplex.ComplexBuilder(metal=self.metal_element, geometry=buildcomplex.TETRAHEDRAL, homoleptic=False, isomer=None) total_n_sites = 0 for mol in self.st.molecule: st = mol.extractStructure() sites = self._getBindingSites(st) n_sites = len(sites) if n_sites > 2: msg = ('Only mono- and bi-dentate ligands are supported.') raise ComplexSdfToMaeException(msg) total_n_sites += n_sites if total_n_sites > COORDINATION_DICT[buildcomplex.TETRAHEDRAL]: msg = ('Complex must have tetrahedral coordination.') raise ComplexSdfToMaeException(msg) st = self._getPreparedLigand(st) if n_sites == 1: builder.addMonodentateLigand(st, sites[0]) elif n_sites == 2: builder.addBidentateLigand(st, sites) try: st = builder.createComplex(force=True) except ValueError as err: raise ComplexSdfToMaeException(str(err)) with ioredirect.IOSilence(): try: buildcomplex.minimize_complex(st, forcefield=FFLD_VERSION) except ValueError: pass return st
[docs] @staticmethod def type_cast_sdf_file(sdf_file): """ Type cast the 2D `*sdf` file. :type sdf_file: str :param sdf_file: the 2D `*sdf` file :raise argparse.ArgumentTypeError: is there is an issue :rtype: str :return: the 2D `*sdf` file """ sdf_file = parserutils.type_file(sdf_file) try: ComplexSdfToMae.read_sdf(sdf_file) except ComplexSdfToMaeException as err: raise argparse.ArgumentTypeError(str(err)) return sdf_file
[docs] def getAvailableGeometries(self, geometry=None): """ Return the available geometries. :type geometry: str or None :param geometry: the specific coordination geometry to use, if not specified all available geometries will be considered :raise ComplexSdfToMaeException: if there is an issue :rtype: list[str] :return: contains available geometries """ self.prepare() sites = self._getBindingSites(self.st) geometries = buildcomplex.SLOTS_TO_GEOMS.get(len(sites)) if not geometries: msg = ( f'Complexes with {len(sites)} coordination sites are not supported.' ) if self.logger: self.logger.error(msg) raise ComplexSdfToMaeException(msg) if geometry: # geometry may have been given in pretty or cli format so convert # to pretty now geometry = buildcomplex.get_pretty_geom_str(geometry) if geometry not in geometries: msg = ( f'The specified geometry of {geometry} is not available for ' f'complexes with {len(sites)} coordination sites.') if self.logger: self.logger.error(msg) raise ComplexSdfToMaeException(msg) geometries = [geometry] return geometries
[docs] def get3DIsomers(self, geometry=None, out_rep=None, epsilon=DEFAULT_EPSILON): """ Return 3D isomers. :type geometry: str or None :param geometry: the specific coordination geometry to use, if not specified all available geometries will be considered :type out_rep: str or None :param out_rep: if None then the conversion is to the opposite of the given representation, eta to centroid or centroid to eta, if a string then must be either module constant parserutils.CENTROID or parserutils.ETA in which case the conversion will always provide an output representation of the given type :type epsilon: float :param epsilon: the energy window in kcal/mol for binning isomers :raise ComplexSdfToMaeException: if there is an issue :rtype: list[`schrodinger.structure.Structure`] :return: contains 3D isomers """ geometries = self.getAvailableGeometries(geometry=geometry) sites = self._getBindingSites(self.st) # see MATSCI-10581 - strictly this function needs to not only return isomers but # possibly also conformers, for example as in A-metal-B (180 deg.) vs A-metal-B # (90 deg.) where A and B are monodentate ligands, the geometry list misses some # coordinations, for example square pyramidal, for now use the following algorithm, # for a given number of slots (N) with locations according to the given geometry, # consider all possible permutations of the N sites onto the N slots, bin the # energies using the epsilon window and collect the lowest energy structure # from each bin, this is time consuming but ensures a variety of output # structures for ligands of any denticity f3d_engine = fast3d.SingleConformerEngine() all_sts = [] for geometry in geometries: geom = geometry.replace(' ', '_').lower() sts = [] for these_sites in itertools.permutations(sites, len(sites)): try: st = self.sculptComplexBuild(sites=these_sites, geometry=geometry, optimize=True) except ComplexSdfToMaeException: continue # see MATSCI-11806 where it was found that even with geometry # optimization it is possible that scuplt complex lands us in a # bad local minimum, for example a square pyramidal sp3 carbon, # fast3d single conformer engine fixes that, the central metal # atom is always the first atom core_idxs = [1] + [ atom.index for atom in st.atom[1].bonded_atoms ] f3d_engine.run(st, core_idxs) self.fixRingSpears(st) st.title = f'{self.st.title}-{geom}' sts.append(st) if not sts: continue try: sts = analyze.get_low_energy_reps(sts, epsilon) except ValueError: continue all_sts.extend(sts) # only fail if no structures could be produced if not all_sts: msg = ('Sculpt complex has failed for all coordination geometries.') if self.logger: self.logger.error(msg) raise ComplexSdfToMaeException(msg) all_sts = sorted(all_sts, key=lambda x: x.property.get(mm.OPLS_POTENTIAL_ENERGY)) for idx, st in enumerate(all_sts, 1): st.title += f'-{idx}' self.updateProperties(st) tesu.toggle_structure(st, out_rep=out_rep) return all_sts
[docs]class ComplexSdfToRxnWF(ComplexSdfToMae): """ Manage the 2D-to-3D `*sdf` to `*_rxnwf_mae` file conversion for a metal complex. """ R_GROUP_SD_KEY_PATTERN = re.compile(r'R(\d+)(.*)') # doesn't match \n SITE_ATOM_KEY = 'i_matsci_Reaction_Workflow_Enumeration_Atom' REPLACE_ATOM_SD_KEY = 'Replace Indices' SUPERPOSITION_ATOM_SD_KEY = 'Superimpose Indices'
[docs] def __init__(self, sdf_file, reference_rxnwf_file=None, handle_rotamers=True, logger=None): """ Create an instance. :type sdf_file: str :param sdf_file: the 2D `*sdf` file :type reference_rxnwf_file: str :param reference_rxnwf_file: the reaction workflow file containing the reference structures, used for finding an optimal 3D geometry :type handle_rotamers: bool :param handle_rotamers: whether to rotate mono-dentate haptic ligands containing superposition atoms such that they maximally overlap with the reference reaction workflow :type logger: logging.Logger or None :param logger: output logger or None if there isn't one """ super().__init__(sdf_file, logger=logger) self.reference_rxnwf_file = reference_rxnwf_file self.handle_rotamers = handle_rotamers self.rgroup_files, sites, replace_idxs, superposition_idxs = \ self.parse_novel_sdf_file(sdf_file) # mark site atoms and collect finalized data after the complex # build so as to avoid index tracking for site_idx, rgroup_idx in sites: atom = self.st.atom[site_idx] atom.property[self.SITE_ATOM_KEY] = rgroup_idx for atom in self.st.atom: if atom.index not in replace_idxs: atom.property[rxnwfu.KEEP_ATOM_KEY] = True for idx, superposition_idx in enumerate(superposition_idxs, 1): atom = self.st.atom[superposition_idx] atom.property[rxnwfu.SUPERPOSITION_ATOM_KEY] = idx
[docs] @staticmethod def parse_novel_sdf_file(sdf_file): """ Parse the novel 2D `*sdf` file. :type sdf_file: str :param sdf_file: the novel 2D `*sdf` file :return: list, list, list, list :return: (1) contains R-group file data as [R-group file, R-group index] pairs, (2) contains site data as [site "to" index, R-group index] pairs, (3) contains replace indicies, (4) contains superposition indices """ st = ComplexSdfToMae.read_sdf(sdf_file) sd_prop_starter = ComplexSdfToMae.SD_PROP_STARTER r_group_sd_key_pattern = ComplexSdfToRxnWF.R_GROUP_SD_KEY_PATTERN replace_atom_sd_key = ComplexSdfToRxnWF.REPLACE_ATOM_SD_KEY.replace( ' ', '_') superposition_atom_sd_key = ComplexSdfToRxnWF.SUPERPOSITION_ATOM_SD_KEY.replace( ' ', '_') rgroup_files = [] sites = [] replace_idxs = [] superposition_idxs = [] for key, value in st.property.items(): if not key.startswith(sd_prop_starter): continue key = key.replace(sd_prop_starter, '') match = re.match(r_group_sd_key_pattern, key) if match: rgroup_idx = int(match.group(1)) if not match.group(2): rgroup_files.append([value.strip(), rgroup_idx]) continue else: rgroup_idx = None try: idxs = list(map(int, value.replace(' ', '').split(','))) except ValueError: continue if rgroup_idx is not None: sites.extend([[idx, rgroup_idx] for idx in idxs]) elif key.startswith(replace_atom_sd_key): replace_idxs.extend(idxs) elif key.startswith(superposition_atom_sd_key): superposition_idxs.extend(idxs) return rgroup_files, sites, replace_idxs, superposition_idxs
[docs] @staticmethod def type_cast_novel_sdf_file(sdf_file): """ Type cast the novel 2D `*sdf` file. :type sdf_file: str :param sdf_file: the novel 2D `*sdf` file :raise argparse.ArgumentTypeError: is there is an issue :rtype: str :return: the novel 2D `*sdf` file """ sdf_file = parserutils.type_file(sdf_file) replace_atom_sd_key = ComplexSdfToRxnWF.REPLACE_ATOM_SD_KEY superposition_atom_sd_key = ComplexSdfToRxnWF.SUPERPOSITION_ATOM_SD_KEY try: rgroup_files, sites, replace_idxs, superposition_idxs = \ ComplexSdfToRxnWF.parse_novel_sdf_file(sdf_file) except ComplexSdfToMaeException as err: raise argparse.ArgumentTypeError(str(err)) for rgroup_file in rgroup_files: parserutils.type_file(rgroup_file[0]) if not replace_idxs or not superposition_idxs: msg = (f'Input {sdf_file} is missing at least one of the required ' f'meta-data: "{replace_atom_sd_key}" and ' f'"{superposition_atom_sd_key}".') raise argparse.ArgumentTypeError(msg) if len(superposition_idxs) < 3: msg = ( f'The "{superposition_atom_sd_key}" meta-data must define at ' 'least 3 indices.') raise argparse.ArgumentTypeError(msg) return sdf_file
def _defineMetalAttrs(self, idx): """ Define metal attributes. :type idx: int :param idx: the index """ super()._defineMetalAttrs(idx) metal_atom = self.st.atom[idx] self.metal_is_keep_atom = metal_atom.property.get(rxnwfu.KEEP_ATOM_KEY) self.metal_superposition_idx = metal_atom.property.get( rxnwfu.SUPERPOSITION_ATOM_KEY)
[docs] def updateProperties(self, st): """ Update properties on the given structure. :type st: `schrodinger.structure.Structure` :param st: the structure """ st.property[rxnwfu.REACTION_WF_STRUCTURE_KEY] = True st.property[rxnwfu.CONFORMERS_GROUP_KEY] = 'conformer_group_1' st.property[rxnwfu.SIBLING_GROUP_KEY] = 'sibling_group_1' metal_atom = st.atom[1] if self.metal_is_keep_atom: metal_atom.property[rxnwfu.KEEP_ATOM_KEY] = True if self.metal_superposition_idx: metal_atom.property[ rxnwfu.SUPERPOSITION_ATOM_KEY] = self.metal_superposition_idx for atom in st.atom: if atom.property.get(rxnwfu.KEEP_ATOM_KEY): for neighbor in atom.bonded_atoms: if neighbor.atomic_number == 1: neighbor.property[rxnwfu.KEEP_ATOM_KEY] = True
[docs] def buildBestRotamer(self, nov_st): """ Build the best rotamer: :type nov_st: `schrodinger.structure.Structure` :param nov_st: the novel structure :raise ComplexSdfToMaeException: if there is an issue """ if not self.reference_rxnwf_file: return # get a representative reference structure that has superposition # atoms for ref_st in structure.StructureReader(self.reference_rxnwf_file): ref_all_super_idxs = sfu.get_superposition_idxs(ref_st) if ref_all_super_idxs: break else: return nov_all_super_idxs = sfu.get_superposition_idxs(nov_st) if not nov_all_super_idxs: return alen = len(nov_all_super_idxs) if alen != len(ref_all_super_idxs) or alen < 3: msg = ('The number of novel and reference superposition ' 'atoms must be equivalent and >= 3.') raise ComplexSdfToMaeException(msg) # make ligand molecules tmp_nov_st = nov_st.copy() tmp_nov_st.deleteAtoms([1]) # collect ligand structures that need rotation mols = [] for mol in tmp_nov_st.molecule: st = mol.extractStructure() haptic_idxs = [ atom.index for atom in st.atom if atom.property.get(self.HAPTIC_ATOM_PROP) ] haptic_idx_groups = analyze.group_by_connectivity(st, haptic_idxs) if len(haptic_idx_groups) == 1: if set(haptic_idx_groups[0]).intersection( sfu.get_superposition_idxs(st)): mols.append(mol) if not mols: return # get rough starting position of the entire novel structure by superposing # on the metal atom rmsd.superimpose(ref_st, [1], nov_st, [1], use_symmetry=False, move_which=rmsd.CT) nov_to_ref_idx_map = dict(zip(nov_all_super_idxs, ref_all_super_idxs)) # if a mono-dentate ligand molecule has haptic superposition atoms then # superpose the ligand molecule in the novel structure onto the reference # structure using all 1, 2, or >= 3 novel-reference superposition pairs # FIXME - a single atom needs special handling for mol in mols: st = mol.extractStructure() # get the map from ligand indices to original novel complex indices # (including the metal that was removed) amap = dict( zip(range(1, st.atom_total + 1), [i + 1 for i in mol.getAtomIndices()])) nov_super_idxs = [amap[i] for i in sfu.get_superposition_idxs(st)] ref_super_idxs = [nov_to_ref_idx_map[i] for i in nov_super_idxs] # rmsd.MOLECULES includes zero-order bonds so temporarily remove # metal-ligand bonds with RemoveMetalBonds(nov_st): rmsd.superimpose(ref_st, ref_super_idxs, nov_st, nov_super_idxs, use_symmetry=False, move_which=rmsd.MOLECULES) # raise an error if the rmsd over all superposition atoms # is too large, for example perhaps due to a bad choice of # reference rxnwf file, etc. tmp_nov_st = nov_st.copy() armsd = rmsd.superimpose(ref_st, ref_all_super_idxs, tmp_nov_st, nov_all_super_idxs, use_symmetry=False, move_which=rmsd.CT) thresh = 0.5 # Angstrom if armsd > thresh: msg = ('During the 2D-to-3D *sdf to `*mae` conversion ' 'mono-dentate haptic ligands in the novel structure ' 'have been rotated so that their superposition atoms ' 'maximally overlap with those from the given reference ' 'reaction workflow file. However the final rmsd over ' f'all superposition atoms remains larger than {thresh} ' 'indicating a bad choice of reference reaction workflow.') raise ComplexSdfToMaeException(msg)
[docs] def run(self): """ Run. :rtype: `schrodinger.structure.Structure` :return: the structure """ self.prepare() st = self.build() self.updateProperties(st) if self.handle_rotamers: self.buildBestRotamer(st) self.fixRingSpears(st) return st
[docs] def write(self, rxnwf_file=None): """ Write. :type rxnwf_file: str :param rxnwf_file: the 3D `*_rxnwf.mae` file :rtype: str, list :return: the 3D `*_rxnwf.mae` file, contains for each site a list of data `[from_idx, to_idx, rgroup_idx]` """ rxnwf_file = rxnwf_file or fileutils.get_basename(self.sdf_file) + \ rxnwfu.REACTION_WF_TAG + '.mae' st = self.run() sites = [] for atom in st.atom: rgroup_idx = atom.property.get(self.SITE_ATOM_KEY) if rgroup_idx is not None: for neighbor in atom.bonded_atoms: if neighbor.atomic_number != 1: sites.append([neighbor.index, atom.index, rgroup_idx]) break st.write(rxnwf_file) return rxnwf_file, sites
[docs]def convert_sdf_to_rxnwf(sdf_file, reference_rxnwf_file, handle_rotamers=True, output_rxnwf_file_name=None, logger=None): """ Convert the given single structure 2D `*sdf` file to a 3D `*_rxnwf.mae` file. :type sdf_file: str :param sdf_file: the `*sdf` file :type reference_rxnwf_file: str :param reference_rxnwf_file: the reaction workflow file containing the reference structures, used for finding an optimal 3D geometry :type handle_rotamers: bool :param handle_rotamers: whether to rotate mono-dentate haptic ligands containing superposition atoms such that they maximally overlap with the reference reaction workflow :type output_rxnwf_file_name: str :param output_rxnwf_file_name: the output reaction workflow file name :type logger: logging.Logger or None :param logger: output logger or None if there isn't one :rtype: str, list, list :return: (1) the 3D `*_rxnwf.mae` file, (2) contains R-group file data as [R-group file, R-group index] pairs, (3) contains for each site a list of data [from_idx, to_idx, rgroup_idx] """ obj = ComplexSdfToRxnWF(sdf_file, reference_rxnwf_file=reference_rxnwf_file, handle_rotamers=handle_rotamers, logger=logger) rxnwf_file, sites = obj.write(rxnwf_file=output_rxnwf_file_name) jobutils.add_outfile_to_backend(rxnwf_file, wait=False) return rxnwf_file, obj.rgroup_files, sites
[docs]def get_3d_isomers(sdf_file, geometry=None, out_rep=None, epsilon=DEFAULT_EPSILON, logger=None): """ Return 3D isomers for the given 2D `*sdf` file of a transition metal complex. :type sdf_file: str :param sdf_file: the `*sdf` file :type geometry: str or None :param geometry: the specific coordination geometry to use, if not specified all available geometries will be considered :type out_rep: str or None :param out_rep: if None then the conversion is to the opposite of the given representation, eta to centroid or centroid to eta, if a string then must be either module constant parserutils.CENTROID or parserutils.ETA in which case the conversion will always provide an output representation of the given type :type epsilon: float :param epsilon: the energy window in kcal/mol for binning isomers :type logger: logging.Logger or None :param logger: output logger or None if there isn't one :rtype: list[`schrodinger.structure.Structure`] :return: contains 3D isomers """ obj = ComplexSdfToMae(sdf_file, logger=logger) return obj.get3DIsomers(geometry=geometry, out_rep=out_rep, epsilon=epsilon)