Source code for schrodinger.structutils.ringspear

"""
Functions and classes useful for finding ring spears. A ring spear is when a
bond between two atoms external to a ring passes through the face of the ring.
The bond may be a single bond or a higher-order bond, and it may be cyclic or
acyclic. Thus, ring catenation is also found.

To find ring spears within a structure, use::

    check_for_spears(struct)

To find ring spears where struct2 spears struct1, use::

    check_for_spears(struct1, spear_struct=struct2)

To find ring spears, checking only atoms in list "atomlist" as spears, use::

    check_for_spears(struct, atoms=atomlist)

Ring-finding is by far the most expensive operation, so the check_for_spears
function allows rings to be passed in as `schrodinger.structure._Ring` objects
if they are already known - or if only a subset of the structure's rings should
be used. For a 7000 atom structure with 270 rings, ring-finding took ~ 90% of
the total computation time.

Copyright Schrodinger, LLC. All rights reserved.
"""

import math

import numpy
from scipy.spatial import distance

from schrodinger.application.matsci import msutils
from schrodinger.infra import structure as infrastructure
from schrodinger.structutils import measure

ROUGH_CUT_DISTANCE = 4.0
""" The default distance from a ring centroid at which atoms are eliminated from
spearing consideration """


[docs]def create_pbc(pbc): """ Convert pbc in list form to infrastructure.PBC object. :type pbc: infrastructure.PBC or list :param pbc: If periodic boundary conditions should be used, provide either an infrastructure.PBC object or the parameters to construct one. Allowed PBC constructors:: * a, b, c : box lengths * a, b, c, alpha, beta, gamma box : box lengths and angles * ax, ay, az, bx, by, bz, cx, cy, cz : box vectors :rtype: `schrodinger.infra.structure.PBC` :return: an infrastructure.PBC object. """ if isinstance(pbc, infrastructure.PBC): return pbc pbc = list(map(float, pbc)) return infrastructure.PBC(*pbc)
[docs]def create_distance_cell(struct, dist, pbc=None): """ Create a infrastructure DistanceCell that obeys the given periodic boundary size. :type struct: `schrodinger.structure.Structure` :param struct: The structure to create the distance cell for :type dist: float :param dist: The distance threshold for the distance cell :type pbc: None, infrastructure.PBC, or list :param pbc: If periodic boundary conditions should be used, provide either an infrastructure.PBC object or the parameters to construct one. Allowed PBC constructors:: * a, b, c : box lengths * a, b, c, alpha, beta, gamma box : box lengths and angles * ax, ay, az, bx, by, bz, cx, cy, cz : box vectors :rtype: (`schrodinger.infra.structure.DistanceCell`, `schrodinger.infra.structure.PBC` or None) :return: A distance cell object that obeys the given PBC conditions. Note that this is a different type of object than an mmct distance cell and has a completely different API. The second value returned is the PBC object used to create the distance cell or None if no PBC was used. :raises ValueError: If the pbc needs to be constructed and the parameters don't match an available constructor """ if pbc: pbc = create_pbc(pbc) cell = infrastructure.DistanceCell(struct.handle, dist, pbc) else: cell = infrastructure.DistanceCell(struct.handle, dist) return cell, pbc
[docs]class Spear(object): """ Contains the atom indexes involved in a ring spear and formats the data in a user-readable string """
[docs] def __init__(self, spear_indexes, ring_indexes): """ Create a Spear object :type spear_indexes: iterable :param spear_indexes: The indexes of the two atoms that make the bond that spears the ring :type ring_indexes: iterable :param ring_indexes: The atom indexes of the ring that is speared """ self.spear_indexes = spear_indexes """ List of atom indexes that make the spearing bond """ self.ring_indexes = ring_indexes """ List of atom indexes of the speared ring """
def __repr__(self): spears = ', '.join([str(x) for x in self.spear_indexes]) rings = ', '.join([str(x) for x in self.ring_indexes]) return 'Spear atoms: %s; Ring atoms: %s' % (spears, rings)
[docs]class SpearRing(object): """ Computes and holds information about a ring, and finds bonds that spear that ring. Note: Objects of this class do not maintain pointers to either the Structure object or the _Ring object that were used to create them. """ ORIG_ATOM_INDEX = 'i_matsci_orig_atom_spearring_index'
[docs] def __init__(self, struct, ring_indexes, pbc=None): """ Create a SpearRing object :type struct: `schrodinger.structure.Structure` :param struct: The structure that contains this ring :param list ring_indexes: Ring indexes :type pbc: `schrodinger.infra.structure.PBC` :param pbc: The periodic boundary condition to use if any or None if there is no PBC to use """ self.ring_indexes = list(ring_indexes) self.ring_size = len(self.ring_indexes) """ The atom indexes of the ring atoms """ self.pbc = pbc """ The periodic boundary condition to use if any """ self.centroid, self.pbc_ring_coords = self.getRingCentroid(struct) """ The ring centroid and set of ring coordinates that keep the ring together even if it was originally broken across the PBC """ data = self.getNormalsAndRadius(struct) self.origin_normal = data[0] """ The normal based at the ring centroid """ self.radius = data[2] """ The largest centroid-ring atom distance """
[docs] def getRingCentroid(self, struct): """ Get the ring centroid. Accounts for the fact that the ring might be broken across the periodic boundary condition :type struct: `schrodinger.structure.Structure` :param struct: The structure that contains this ring :rtype: `numpy.array`, `numpy.array` :return: A 3-float numpy array giving the [X, Y, Z] coordinates of the ring centroid, and a numpy array of the XYZ coordinates of each ring atom taking into account the PBC so that the coordinates are not split by the PBC. The coordinate list is in the same order as the ring_indexes property. """ if self.pbc: coords = self.pbc.getNearestImages(struct.handle, self.ring_indexes, self.ring_indexes[0]) else: coords = [struct.atom[x].xyz for x in self.ring_indexes] cxyz = numpy.array(coords) centroid = numpy.average(cxyz, 0) return centroid, cxyz
[docs] def getNormalsAndRadius(self, struct): """ Find the average normal vector to the ring plane and compute the radius of the ring. The average normal vector is the average of the normal vectors computed for each pair of adjacent ring atoms and the ring centroid. The radius is defined as the largest centroid-ring atom distance :type struct: `schrodinger.structure.Structure` :param struct: The structure that contains this ring :rtype: numpy.array, numpy.array, float :return: The average normal vector to the plane of the ring translated so its base is at the origin, the average normal vector to the plane of the ring translated so its base is at the ring centroid, the radius of the ring. """ atom_vectors = [x - self.centroid for x in self.pbc_ring_coords] radius = max([math.sqrt(sum(x * x)) for x in atom_vectors]) abs_normal = measure.fit_plane_to_points(numpy.array(atom_vectors)) relative_normal = self.centroid + abs_normal return abs_normal, relative_normal, radius
[docs] def findSpears(self, orig_struct, atoms=None, is_ring_struct=True, distorted=False, first_only=True, cell=None, dist=ROUGH_CUT_DISTANCE, return_bool=False): """ Check for ring spears - bonds that pass through the face of the ring. It does not matter if the spearing bond is a single or multiple bond, or if it is part of another ring. :type orig_struct: `schrodinger.structure.Structure` :param orig_struct: The structure to check if any bonds spear this ring in ring_struct. :type atoms: list :param atoms: List of atom indexes to check to see if they spear this ring. :type is_ring_struct: bool :param is_ring_struct: Whether orig_struct contains this ring. If True, the atoms with the same index as the indexes of this ring will not be considered for spearing calculations. :type distorted: bool :param distorted: If False (default), the structure will be assumed to have reasonable bond lengths, and optimizations will be used to dramatically speed up spear-finding in large structures by eliminating atoms that cannot possibly spear this ring. If True, atom distances will not be used to optimize spear-finding - significantly increasing the time required. :type first_only: bool :param first_only: Whether to return after finding the first ring spear, or whether all ring spears should be found. :type cell: `schrodinger.infra.structure.DistanceCell` :param cell: `schrodinger.infra.structure.DistanceCell` object created using `pbc`. cell is used for rough-cutting atoms too far away from the ring. If not provided, a cell will be created based on the value (or lack of value) of `pbc`. Pre-creating the cell for a structure and passing it to this method for all rings saves significant time. See also the `pbc` parameter. :type dist: float :param dist: The distance from the ring centroid at which atoms will be rough cut and eliminated from spearing consideration. This is only used if cell is not provided. :type return_bool: bool :param return_bool: If True, return bool instead of list :rtype: list or bool :return: If return_bool, return bool to indicate whether at least one spear exists; else return a list of found ring-spears, each item is a `Spear` object. An empty list is returned if no spears are found, and if first_only is True the list will be at most 1 item long. """ spears = [] marked_atoms = False if not return_bool and not orig_struct.atom[1].property.get( self.ORIG_ATOM_INDEX) == 1: # The user might have marked these atoms already as an optimization self.markAtomsBeforeSpearCheck(orig_struct) marked_atoms = True # Find any atoms that we can outright ignore if is_ring_struct: ignore_atoms = set(self.ring_indexes) else: ignore_atoms = set() if atoms: atset = set(atoms) ignore_atoms.update([ x for x in range(1, orig_struct.atom_total + 1) if x not in atset ]) # Now create a new structure without any ignored atoms or atoms too far # away to spear. if distorted: # We can't eliminate any atoms based on distance if the structure is # distorted. struct = orig_struct.copy() struct.deleteAtoms(ignore_atoms) else: struct = self._roughCutAtoms(orig_struct, cell=cell, dist=dist, ignore=ignore_atoms) if not struct: return [] if not distorted: self.quickWeedAtoms(struct) if not struct.atom_total: return [] pairs = self._findSpearingPairs(struct, return_bool or first_only) if return_bool: return bool(pairs) for pair in pairs: indexes = [x.property[self.ORIG_ATOM_INDEX] for x in pair] spears.append(Spear(indexes, self.ring_indexes)) if marked_atoms: msutils.remove_atom_property(orig_struct, self.ORIG_ATOM_INDEX) return spears
def _roughCutAtoms(self, orig_struct, cell=None, dist=ROUGH_CUT_DISTANCE, ignore=None): """ A quick method for eliminating atoms that are so far from this ring that they cannot possible spear it. :type orig_struct: `schrodinger.structure.Structure` :param orig_struct: The structure to extract atoms from. :type cell: `schrodinger.infra.structure.DistanceCell` :param cell: An infrastructure distance cell that has been created for use in rough-cutting atoms too far away from the ring. If not provided, a cell will be created. Pre-creating the cell for a structure and using it for all rings saves significant time. If the cell needs to be created, it will be created with the PBC currently stored in the pbc property. :type dist: float :param dist: The distance from the ring centroid at which atoms will be rough cut and eliminated from spearing consideration. This is only used if cell is not provided. :type ignore: set :param ignore: A set of atom indexes to ignore even if they fall within the rough cut distance :rtype: `schrodinger.structure.Structure` or None :return: A copy of the input structure with all atoms eliminated that are beyond the rough cut distance or are in the ignore list. """ # Create the distance cell if necessary if cell is None: cell, self.pbc = create_distance_cell(orig_struct, dist, pbc=self.pbc) # Find all atoms that are within the rough cut distance and not ignored try: # query_atoms returns objects that have getIndex() (returns the atom # index in the structure) and getCoords() (returns the PBC-ified # coordinates of that atom that lead to the match). i.e if atom x # has raw coordinates of (6,0,0), pbc=[4,4,4], and dist=3, then # query_atoms(1,0,0) will return a list, and one item in that list # is an object for atom x where object.getIndex()=x and # object.getCoords()=(2,0,0) keep_matches = cell.query_atoms(*self.centroid) except RuntimeError: # This is not a PBC distance cell. Use query instead of queryAtoms # Find all atoms that are within the rough cut distance and not # ignored keep_atoms = cell.query(*self.centroid) if ignore: keep_atoms = set(keep_atoms) - set(ignore) else: # This is a PBC distance cell. Set the atom coordinates to the # nearest coordinates within rough cut distance. if ignore: keep_matches = [ x for x in keep_matches if x.getIndex() not in ignore ] if not keep_matches: return None keep_atoms = [match.getIndex() for match in keep_matches] if not keep_atoms: return None # Using extract for a new structure rather than deleting atoms in the # original structure saves considerable time on large structures new_struct = orig_struct.extract(keep_atoms) return new_struct
[docs] def quickWeedAtoms(self, struct): """ Quickly weed out any atoms that are too far away from the ring centroid to spear the ring. This method differs from the rough cut method, which uses a larger spherical cutoff based on the centroid. This uses a cubic cutoff, which allows the dimensions to be smaller since the sphere has to large enough to maintain sufficient distance near the edges of the ring. This method also uses smaller distances for smaller atoms :type struct: `schrodinger.structure.Structure` :param struct: The structure to modify directly be deleting atoms that are weeded out :rtype: None :return: Nothing is returned, the input structure is modified directly """ # Add a centroid atom so we can use it with getNearestImage, and then # add that atom to the list of atoms to delete at the end of this method centroid = struct.addAtom('C', *self.centroid) outside_cube = [centroid.index] for atom in struct.atom: atomic_number = atom.atomic_number # Bond lengths are set to ensure that slightly elongated bonds to # transition metals are not ignored if atomic_number == 1: threshold = 1.8 elif atomic_number < 10: threshold = 2.5 elif atomic_number < 18: threshold = 2.8 else: threshold = 3.0 if self.pbc: atom_xyz = self.pbc.getNearestImage(struct, centroid, atom) else: atom_xyz = atom.xyz xyz = [a - b for a, b in zip(atom_xyz, self.centroid)] if max(xyz) > threshold or min(xyz) < -threshold: outside_cube.append(atom.index) struct.deleteAtoms(outside_cube)
[docs] @staticmethod def markAtomsBeforeSpearCheck(struct): """ Add an atom property to all atoms with the atom's current index. This allows the original index to be retrieved even after atom indexes have been modified via atom extraction or deletion. :note: This is a static method """ for atom in struct.atom: atom.property[SpearRing.ORIG_ATOM_INDEX] = atom.index
[docs] def bondRingPlaneIntersection(self, atom1, atom2): """ Find the point (if any) where the bond intersects the ring plane Python code for this was originally found at https://stackoverflow.com/questions/5666222/3d-line-plane-intersection and adapted from there. :type atom1: `schrodinger.structure._StructureAtom` :param atom1: The first atom in the bond :type atom2: `schrodinger.structure._StructureAtom` :param atom2: The second atom in the bond :rtype: numpy.array or None :return: The point where the bond intersect the ring plane or None if there is no intersection (bond is parallel to plane of the ring). None is also returned if the line the bond forms intersects the plane but the intersection is outside of the bond line segment itself. """ point1 = numpy.array(atom1.xyz) point2 = numpy.array(atom2.xyz) bond_segment = point2 - point1 dot = self.origin_normal.dot(bond_segment) if abs(dot) > 1.0e-6: point_centroid_vec = point1 - self.centroid # if 'fraction' is between (0 - 1) the point intersects with the # bond segment, with 0 being at point 1, 1 being at point 2 and # values between 0 and 1 indicating the fraction from point 1 to # point 2. Values < 0 or > 1 indicate the line/plane intersection # occurs outside the bond segment fraction = -self.origin_normal.dot(point_centroid_vec) / dot if fraction < 0 or fraction > 1: return None segment = fraction * bond_segment return point1 + segment else: # The segment is parallel to plane return None
def _findSpearingPairs(self, struct, first_only): """ Find atoms that spear this ring. :type struct: schrodinger.structure.Structure :param struct: The structure to search for atoms that spear. Note that atom positions will be modified by this function - as atoms may be moved to a different PBC image. :type first_only: bool :param first_only: Whether to find only the first set of spearing atoms or all sets of spearing atoms :rtype: list :return: Each item of the list is a Spear object for a set of atoms that spear this ring """ pairs = [] centroid_atom = struct.addAtom('C', *self.centroid) for atom in struct.atom: # Move atom to the image that is closest to the centroid if self.pbc: atom.xyz = self.pbc.getNearestImage(struct, centroid_atom, atom) for neighbor in atom.bonded_atoms: if neighbor.index < atom.index: continue if self.pbc: # Move the neighbor atom to the image that is closest to # atom neighbor.xyz = self.pbc.getNearestImage( struct, atom, neighbor) intersection = self.bondRingPlaneIntersection(atom, neighbor) if intersection is None: continue # Check if the intersection is within the ring radius = distance.euclidean(intersection, self.centroid) if radius < self.radius: pairs.append((atom, neighbor)) if first_only: return pairs struct.deleteAtoms([centroid_atom.index]) return pairs
[docs]def check_for_spears(ring_struct, spear_struct=None, atoms=None, rings=None, distorted=False, first_only=True, dist=ROUGH_CUT_DISTANCE, pbc=None, max_ring_size=12, cell=None, spear_rings=None, return_bool=False): """ Check for ring spears - rings that have bonds passing through the face of the ring. :type ring_struct: `schrodinger.structure.Structure` :param ring_struct: The structure containing the rings to check for spearing :type spear_struct: `schrodinger.structure.Structure` :param spear_struct: The structure to check if any bonds spear a ring in ring_struct. If not given, ring_struct will be used - i.e. intrastructure ring spears will be searched for. :type atoms: list :param atoms: List of atom indexes to check to see if they spear a ring. Should be indexes from spear_struct if spear_struct is given, otherwise indexes from ring_struct. If not given, all atoms will be checked. :type rings: iterable of `schrodinger.structure._Ring` :param rings: _Ring objects to check to see if they are speared. If not given, all rings in the ring_struct.ring iterator will be used. Use this parameter if you have an optimized method of finding rings in ring_struct or if they are already known. :type distorted: bool :param distorted: If False (default), the structure will be assumed to have reasonable bond lengths, and optimizations will be used to dramatically speed up spear-finding in large structures by eliminating atoms that cannot possibly spear a ring. If True, atom distances will not be used to optimize spear-finding - significantly increasing the time required. :type first_only: bool :param first_only: Whether to return after finding the first ring spear, or whether all ring spears should be found. :type dist: float :param dist: The distance from the centroid to use when making a first rough cut pass to eliminate atoms that are too far away from a ring to spear it. The default value is the module constant ROUGH_CUT_DISTANCE. This value is ignored if distorted is True. :type pbc: None, infrastructure.PBC, or list :param pbc: If periodic boundary conditions should be used, provide either an infrastructure.PBC object or the parameters to construct one. Allowed PBC constructors:: * a, b, c : box lengths * a, b, c, alpha, beta, gamma box : box lengths and angles * ax, ay, az, bx, by, bz, cx, cy, cz : box vectors :type max_ring_size: int or None :param max_ring_size: The maximum size ring that will be checked for ring spears. The default value prevents macrocycles from being checked. Set to None to include all rings :type cell: `schrodinger.infra.structure.DistanceCell` :param cell: An infrastructure distance cell that has been created for use in rough-cutting atoms too far away from the ring. If not provided, a cell will be created. :type spear_rings: list of `SpearRing` or None :param spear_rings: findSpears for these spear_rings, if not None. If None, create spear_rings based on rings or ring_struct.rings, and then findSpears. :type return_bool: bool :param return_bool: If True, return bool instead of list :rtype: list or bool :return: If return_bool, return bool to indicate whether at least one spear exists; else return a list of found ring-spears, each item is a `Spear` object. An empty list is returned if no spears are found, and if first_only is True, the list will be at most 1 item long. :raises ValueError: If the pbc needs to be constructed and the parameters don't match an available constructor """ # Create a distance cell for the structure with potentially spearing atoms. # Creating the distance cell once saves considerable time. if spear_struct: struct = spear_struct.copy() is_ring_struct = False else: struct = ring_struct.copy() is_ring_struct = True if pbc: pbc = create_pbc(pbc) if not cell: cell, pbc = create_distance_cell(struct, dist, pbc) if spear_rings: if max_ring_size: spear_rings = list( [x for x in spear_rings if x.ring_size <= max_ring_size]) else: spear_rings = [] # Find all the rings in the molecule and precalculate some data if rings is None: rings = ring_struct.ring for ring in rings: if max_ring_size and len(ring) > max_ring_size: continue spear_rings.append( SpearRing(ring_struct, ring.getAtomIndices(), pbc=pbc)) # Find all requested ring spears all_spears = [] for ring in spear_rings: spears = ring.findSpears(struct, atoms=atoms, is_ring_struct=is_ring_struct, distorted=distorted, first_only=first_only, cell=cell, return_bool=return_bool) if spears: if return_bool or first_only: return spears else: all_spears.extend(spears) return False if return_bool else all_spears