Source code for schrodinger.structutils.measure

"""
Functions for measuring distances and angles in structures.

Copyright Schrodinger, LLC. All rights reserved.

"""
# Contributors: Matvey Adzhigirey

import math

import numpy
from scipy.spatial import cKDTree

from schrodinger import structure
from schrodinger.infra import mm
from schrodinger.infra import mmlist
from schrodinger.infra import structure as cppstructure
from schrodinger.structutils import transform
from typing import List, Tuple, Union


[docs]def get_close_atoms(st, dist, atoms=None): """ Use this function to find all atoms within a specified distance of each other in roughly O(N) time. Returns a list of tuples in the form of: (atom1, atom2), where atom1 and atom2 are atom indices. This function is only roughly O(N) in the number of atoms in the molecule because as dist increases it will reach the limit of O(N^2). Its true cost is O(N*m) where m is the number of atoms in a cubic box with edges of dist length. :param st: Structure object :param dist: distance threshold, in angstroms. :param atoms: optionally consider only atoms with these indices (all atoms in CT by are scanned by default) NOTES: - Each atom pair is listed only once in the output. - This function is efficient only for small distances (<3A) - Periodic boundary conditions (PBC) are NOT honored. """ dc = cppstructure.DistanceCell(st, dist) if atoms: limitations = set(atoms) atoms = map(st.atom.__getitem__, atoms) else: limitations = None atoms = st.atom nearby_atom_pairs = [] for a in atoms: index = a.index for nearby_atom in dc.query(*a.xyz): if nearby_atom <= index: continue if limitations and nearby_atom not in limitations: continue nearby_atom_pairs.append((a.index, nearby_atom)) return nearby_atom_pairs
[docs]def get_close_coordinates(coords, dist): """ Use this function to find all coordinates within a specified distance of each other in roughly O(N) time. :param coords: List of (x, y, z) coordinates. :type coords: list(list(float, float, float)) :param dist: distance threshold, in angstroms. :type dist: float :return: Returns indices to the input array for pairs of the coordinates that are within the threshold of each other. :rtype: tuple(int, int) """ coords_array = numpy.array(coords, dtype=numpy.float32) cell_handle = mm.mmct_create_distance_cell_xyz2(coords_array, dist) link_cell_within = [] for i, coord in enumerate(coords): neighbor_mmlist = \ mm.mmct_query_distance_cell(cell_handle, *coord) for idx in range(mm.mmlist_get_size(neighbor_mmlist)): j = mm.mmlist_get(neighbor_mmlist, idx) if j > i: link_cell_within.append((i, j)) mm.mmlist_delete(neighbor_mmlist) mm.mmct_delete_distance_cell(cell_handle) return link_cell_within
[docs]def create_distance_cell(st, distance, honor_pbc=True): """ Create a DistanceCell for the given structure and cutoff. If struct has the Chorus box properties, the distance cell will be PBC-aware. :param st: The input structure, may have the Chorus box properties. :type st: `schrodinger.structure.Structure` :param distance: The cutoff for finding nearest neighbor atoms. :type distance: float :param honor_pbc: Whether to honor Periodic Boundary Conditions, if defined as properties in the "st" structure. Default is True. :type honor_pbc: bool :return: The distance cell. :rtype: `schrodinger.infra.structure.DistanceCell` """ if honor_pbc: pbc = cppstructure.PBC.get_pbc(st) else: pbc = None if pbc: dcell = cppstructure.DistanceCell(st, distance, pbc) else: dcell = cppstructure.DistanceCell(st, distance) return dcell
[docs]def any_atom_in_distance_cell(distance_cell, st): """ Check that if any atom of `st` lies in the `distance_cell`. :param distance_cell: A DistanceCell object :type distance_cell: schrodinger.infra.structure.DistanceCell :param st: A structure object :type st: schrodinger.structure.Structure :return: True if any atom of `st` lies in the `dcell` else False :rtype: bool """ for xyz in st.getXYZ(): match = distance_cell.query_atoms(*xyz) if match: return True return False
[docs]def get_atoms_close_to_structure(st, other_st, cutoff, honor_pbc=True): """ Returns a list of atoms from st that are within the threshold distance of st2. Example: Get a list of receptor atoms close to the ligand: close_atoms = measure.get_atoms_close_to_structure(re_st, lig_st, 3.0) :param st: Structure atoms from wich should be analyzed/returned. :type st: `structure.Structure` :param other_st: Query structure. :type other_st: `structure.Structure` :param cutoff: Distance theshold. :type cutoff: float :param honor_pbc: Honor Periodic Boundary Conditions, if defined as properties in the "st" structure. Default is True. :type honor_pbc: bool """ dcell = create_distance_cell(st, cutoff, honor_pbc) close_atoms = set() for xyz in other_st.getXYZ(): for match in dcell.query_atoms(*xyz): close_atoms.add(match.getIndex()) return sorted(close_atoms)
[docs]def get_atoms_close_to_subset(st, other_atoms, cutoff, honor_pbc=True): """ Returns a list of atoms from that are within the threshold distance of "other_atoms" subset, and are not themselves in that subset. Example: Get a list of receptor atoms close to the ligand: close_atoms = measure.get_atoms_close_to_subset(st, lig_atoms, 3.0) :param st: Structure atoms from wich should be analyzed/returned. :type st: `structure.Structure` :param other_atoms: Query atoms. :type other_atoms: list of int :param cutoff: Distance theshold. :type cutoff: float :param honor_pbc: Honor Periodic Boundary Conditions, if defined as properties in the "st" structure. Default is True. :type honor_pbc: bool """ dcell = create_distance_cell(st, cutoff, honor_pbc) close_atoms = set() for query_anum in other_atoms: xyz = st.atom[query_anum].xyz for match in dcell.query_atoms(*xyz): close_anum = match.getIndex() if close_anum not in other_atoms: close_atoms.add(close_anum) return sorted(close_atoms)
[docs]def get_shortest_distance(st, atoms=None, st2=None, cutoff=numpy.inf): """ Determines the shortest distance and indices of the nearest atoms between two structures or between a groups of atoms in a single structure. NOTE: Periodic boundary conditions (PBC) are NOT honored. :type st: `schrodinger.structure.Structure` :param st: Structure containing group(s) of atoms for nearest distance search. :type atoms: list(int) :param atoms: If specified, the distances between this group of atoms and all other atoms in st are evaluated. Either atoms or st2, but not both, must be specified. :type st2: `schrodinger.structure.Structure` :param st2: Structure of second group of atoms for nearest-distance search. Either st2 or atoms, but not both, must be specified. :type cutoff: float :param cutoff: Cutoff distance in Angstroms for nearest-distance search (by default no cutoff is used). Setting this parameter can speed the calculation by considering only points between sets that are within the cutoff value. None will be returned if no neighbors are found within the specified cutoff. :rtype: tuple of float, int, int or None :return: A tuple containing the nearest distance between atoms and the indices of the closest atoms between each set. """ if st2 is None and atoms is None: raise ValueError("Either atoms or st2 must be specified.") elif st2 is not None and atoms is not None: raise ValueError("Cannot specify both atoms and st2.") st_coords = st.getXYZ() if st2: set1_coords = st_coords set2_coords = st2.getXYZ() else: if len(atoms) == 0: raise ValueError("None of the structure atoms were included in " "atoms list") set2_atoms = [ ai for ai in range(1, st.atom_total + 1) if ai not in atoms ] set1_coords = [st_coords[ai - 1] for ai in atoms] set2_coords = [st_coords[ai - 1] for ai in set2_atoms] if len(set2_coords) == 0: raise ValueError(("All structure atoms were " "included in atoms (no " "second group specified)")) kd_tree = cKDTree(set1_coords) distances, nn_indices = kd_tree.query(set2_coords, distance_upper_bound=cutoff) min_distance = min(distances) if min_distance == numpy.inf: # None of the distances where within the cutoff: return None # Atom from set 2 for the shortest distance match: set2_index = numpy.where(distances == min_distance)[0][0] # Corresponding atom from set 1: set1_index = nn_indices[set2_index] if st2: anum_1 = set1_index + 1 anum_2 = set2_index + 1 else: anum_1 = atoms[set1_index] anum_2 = set2_atoms[set2_index] return (min_distance, anum_1, anum_2)
[docs]def get_atoms_close_to_point(st, xyz, cutoff, honor_pbc=True): """ Returns a list of atoms from st that are within the threshold distance of a point, which is defined by its xyz coordinates.. Example: Get a list of receptor atoms close to a point: close_atoms = measure.get_atoms_close_to_structure(re_st, xyz, 3.0) :param st: Structure atoms from wich should be analyzed/returned. :type st: `structure.Structure` :param xyz: xyz coordinates of point :type xyz: list :param cutoff: Distance theshold. :type cutoff: float :param honor_pbc: Honor Periodic Boundary Conditions, if defined as properties in the "st" structure. Default is True. :type honor_pbc: bool :return: list of atoms close to a point :rtype: list """ dcell = create_distance_cell(st, cutoff, honor_pbc) atoms = [] for match in dcell.query_atoms(*xyz): atoms.append(match.getIndex()) return atoms
[docs]def dist_cell_iterator(st, dist=None, atoms=None, cell_handle=None, delete_dist_cell=True): """ Create an iterator that uses a distance cell to iterate through neighbors of the specified atoms :param st: The structure to examine :type st: `schrodinger.structure.Structure` :param dist: The distance cutoff for calculating neighbors. Either dist or cell_handle must be given, but not both. :type dist: float :param atoms: A list of atom numbers to calculate neighbors for. If not, given, st.atom will be used. :type atoms: list :param cell_handle: A handle to an existing distance cell. If not given, a new distance cell will be created with distance dist. Either dist or cell_handle must be given, but not both. :type cell_handle: int :param delete_dist_cell: If cell_handle is given and delete_dist_cell is True, then distance cell cell_handle will be deleted after iteration is complete. Has no effect if cell_handle is not given. Defaults to True. :type delete_dist_cell: bool :return: An iterator that iterates through neighbors of the specified atoms :rtype: iter :raise ValueError: If both dist and cell_handle are not None :deprecated: The DistanceCellIterator class provides the same functionality as this function but with increased flexibility """ if dist is not None and cell_handle is not None: raise ValueError("Cannot provide a distance and an existing distance " "cell handle to dist_cell_iterator") elif dist is None and cell_handle is None: raise ValueError("Must provide either a distance or an existing " "distance cell handle to dist_cell_iterator") if cell_handle is None: cell_handle = mm.mmct_create_distance_cell(st.handle, dist) delete_dist_cell = True if atoms is None: atoms = range(1, st.atom_total + 1) for atom_num in atoms: atom = st.atom[atom_num] coords = atom.xyz neighbor_mmlist = mm.mmct_query_distance_cell(cell_handle, *coords) neighbor_pylist = mmlist._mmlist_to_pylist(neighbor_mmlist) mm.mmlist_delete(neighbor_mmlist) yield (atom_num, neighbor_pylist) if delete_dist_cell: mm.mmct_delete_distance_cell(cell_handle)
[docs]class DistanceCellIterator(object): """ Iterate through neighbors of specified atoms. This class replaces the dist_cell_iterator function. NOTE: Periodic boundary conditions (PBC) are NOT honored. """
[docs] def __init__(self, struc, dist, atoms=None): """ Construct the distance cell to use for finding neighbors :param struc: The structure to use for building the distance cell :type struc: `schrodinger.structure.Structure` :param dist: The distance cutoff for calculating neighbors :type dist: float :param atoms: A list of atom numbers for `struc`. If given, the distance cell will only contain the specified subset of atoms, so all other atoms will be ignored when calculating neighbors. If not given, all atoms will be used. :type atoms: list """ self._struc = struc if atoms is None: cell_st = struc self._renumber_map = None else: cell_st = struc.extract(atoms) self._renumber_map = atoms # Atoms are 1-indexed. We add a None at index 0 so that indices in # this list match the atom numbers. self._renumber_map.insert(0, None) self._cell_handle = mm.mmct_create_distance_cell(cell_st.handle, dist)
def __del__(self): mm.mmct_delete_distance_cell(self._cell_handle)
[docs] def iterateNeighboringAtoms(self, struc=None, atoms=None): """ Iterate over neighboring atoms (atoms within `dist` Angstrom of each other). NOTE: Periodic boundary conditions (PBC) are NOT honored. :param struc: The query structure. Neighbors will be found for each atom of this structure. If not given, the structure passed to `__init__` will be used as the query structure. :type struc: `schrodinger.structure.Structure` :param atoms: A list of atom numbers for the query structure. If given, only the specified atoms will be examined. If not given, all atoms of the query structure will be used. :type atoms: list(int) :return: A generator that iterates through neighbors. Each iteration will yield a tuple of (atom index, list of neighboring atom indicies) :rtype: generator Note: This method returns atom indices instead of atom object due to speed concerns. Profiling (using timeit) showed that: - returning atom indices instead of atom objects reduced runtime by ~25% - using `coord = mm.mmct_atom_get_xyz(struc.handle, atom_num)` in place of `coord = struc.atom[atom_num].xyz` also reduced runtime by ~25% """ if struc is None: struc = self._struc if atoms is None: atoms = range(1, struc.atom_total + 1) for atom_num in atoms: coord = mm.mmct_atom_get_xyz(struc.handle, atom_num) neighbor_mmlist = mm.mmct_query_distance_cell( self._cell_handle, *coord) neighbor_nums = mmlist._mmlist_to_pylist(neighbor_mmlist) mm.mmlist_delete(neighbor_mmlist) if self._renumber_map is not None: neighbor_nums = [self._renumber_map[i] for i in neighbor_nums] yield (atom_num, neighbor_nums)
[docs]def measure_distance(atom1, atom2): """ Measure the distance between two atoms. All atom arguments must be _StructureAtom objects (returned from the Structure.atom list, and can be from different structures), or XYZ coordinates, as lists or numpy arrays. See also the Structure.measure method. It can use integer atom indices in addition to _StructureAtom objects, but is restricted to measurements within the structure and cannot do plane angle measurements. NOTE: Periodic boundary conditions (PBC) are NOT honored. """ if isinstance(atom1, structure._StructureAtom): return mm.mmct_atom_get_distance_s(atom1._ct.handle, atom1._index, atom2._ct.handle, atom2._index) else: # Coordinates were passed in return numpy.linalg.norm(numpy.asarray(atom1) - numpy.asarray(atom2))
[docs]def measure_bond_angle(atom1, atom2, atom3): """ Measure the atom between 3 specified atoms. All atom arguments must be _StructureAtom objects (returned from the Structure.atom list, and can be from different structures), or XYZ coordinates, as lists or numpy arrays. See also the Structure.measure method. It can use integer atom indices in addition to _StructureAtom objects, but is restricted to measurements within the structure and cannot do plane angle measurements. NOTE: Periodic boundary conditions (PBC) are NOT honored. """ if isinstance(atom1, structure._StructureAtom): return mm.mmct_atom_get_bond_angle_s(atom1._ct.handle, atom1._index, atom2._ct.handle, atom2._index, atom3._ct.handle, atom3._index) else: # Coordinates were passed in atom1 = mm.Cartesian(*atom1) atom2 = mm.Cartesian(*atom2) atom3 = mm.Cartesian(*atom3) angle_radians = mm.Cartesian.angle(atom1, atom2, atom3) return math.degrees(angle_radians)
[docs]def measure_dihedral_angle(atom1, atom2, atom3, atom4): """ Measure the dihedral angle between the specified atoms. All atom arguments must be _StructureAtom objects (returned from the Structure.atom list, and can be from different structures), or XYZ coordinates, as lists or numpy arrays. See also the Structure.measure method. It can use integer atom indices in addition to _StructureAtom objects, but is restricted to measurements within the structure and cannot do plane angle measurements. NOTE: Periodic boundary conditions (PBC) are NOT honored. """ if isinstance(atom1, structure._StructureAtom): return mm.mmct_atom_get_dihedral_angle_s(atom1._ct.handle, atom1._index, atom2._ct.handle, atom2._index, atom3._ct.handle, atom3._index, atom4._ct.handle, atom4._index) else: # Coordinates were passed in atom1 = mm.Cartesian(*atom1) atom2 = mm.Cartesian(*atom2) atom3 = mm.Cartesian(*atom3) atom4 = mm.Cartesian(*atom4) angle_radians = mm.Cartesian.dihedral(atom1, atom2, atom3, atom4) return math.degrees(angle_radians)
[docs]def measure_plane_angle(atom1, atom2, atom3, atom4, atom5, atom6, minangle=False): """ Measure the angle between planes of the provided atoms. All atom arguments must be _StructureAtom objects (returned from the Structure.atom list, and can be from different structures), or XYZ coordinates, as lists or numpy arrays. See also the Structure.measure method. It can use integer atom indices in addition to _StructureAtom objects, but is restricted to measurements within the structure and cannot do plane angle measurements. NOTE: Periodic boundary conditions (PBC) are NOT honored. Parameters minangle (bool) This applies to the planar angle calculation and if True restricts the angle to the range [0, 90] degrees. That is, it treats the order of atoms defining a plane as unimportant, and the directionality of the plane normals is ignored. """ if isinstance(atom1, structure._StructureAtom): atom1 = atom1.xyz atom2 = atom2.xyz atom3 = atom3.xyz atom4 = atom4.xyz atom5 = atom5.xyz atom6 = atom6.xyz atom1 = mm.Cartesian(*atom1) atom2 = mm.Cartesian(*atom2) atom3 = mm.Cartesian(*atom3) atom4 = mm.Cartesian(*atom4) atom5 = mm.Cartesian(*atom5) atom6 = mm.Cartesian(*atom6) normal1 = mm.get_normal_vector_to_three_points(atom1, atom2, atom3) normal2 = mm.get_normal_vector_to_three_points(atom4, atom5, atom6) angle_degrees = mm.get_angle_between_two_vectors(normal1, normal2) if minangle and angle_degrees > 90: angle_degrees = 180 - angle_degrees return angle_degrees
[docs]class LinearError(ValueError): """ A class indicating a plane could not be computed due to all atoms being linear """
[docs]def fit_plane_to_points(coords): """ Fit a plane to a set of xyz coordinates This method comes from http://stackoverflow.com/questions/15959411/fit-points-to-a-plane-algorithms-how-to-iterpret-results It is the SVD method appearing there. :type coords: `numpy.array` :param coords: The coordinates to fit the plane to, such as come from struct.getXYZ() :rtype: `numpy.array` :return: An array of 3 floats that defines a normalized vector that is normal to the best-fit plane :raises LinearError: If there are only 3 coordinates and they are colinear. If there are 4 or more colinear coordinates, one of the infinite number of possible vectors will be returned. :raises ValueError: If there are fewer than 3 sets of coordinates :raises numpy.linalg.LinAlgError: If the SVD does not converge (note - I was unable to find a case where this happened) """ # There are 5 methods coded on that page in Python that are supposed to # give equivalent answers. They do not in most cases. In my hands, only # two of the # methods consistently give similar and reasonable answers - fitPlaneSVD # and fitPlaneLTSQ. They are both very fast and approximately the same # speed. I've implemented the SVD method because that seems most # frequently mentioned for finding the best fit plane. This SVD method # appears numerically stable even if all points are co-planar. It cannot # handle only three points, though, so the normal is computed directly # for that case. if len(coords) < 3: raise ValueError('At least 3 sets of coordinates must be supplied') if len(coords) == 3: # SVD requires at least 4 coordinates narrays = [numpy.array(x) for x in coords] vec1 = narrays[1] - narrays[0] vec2 = narrays[2] - narrays[0] norm = transform.get_normalized_vector(numpy.cross(vec1, vec2)) if not transform.get_vector_magnitude(norm): raise LinearError('All three coordinates are colinear') return norm [rows, cols] = coords.shape # Set up constraint equations of the form AB = 0, # where B is a column vector of the plane coefficients # in the form b(1)*X + b(2)*Y +b(3)*Z + b(4) = 0. pmat = numpy.ones((rows, 1)) ab = numpy.hstack([coords, pmat]) [uval, dval, vval] = numpy.linalg.svd(ab, 0) # Solution is last column of vval. bvec = vval[3, :] nn = numpy.linalg.norm(bvec[0:3]) bvec = bvec / nn return bvec[0:3]
[docs]def fit_plane_to_points_as_list(coords): """ Fit a plane to a set of xyz coordinates This method is used by Maestro because it's easier to access a list from C++ than a numpy array. This method comes from http://stackoverflow.com/questions/15959411/fit-points-to-a-plane-algorithms-how-to-iterpret-results It is the SVD method appearing there. :type coords: `numpy.array` :param coords: The coordinates to fit the plane to, such as come from struct.getXYZ() :rtype: `list` :return: An list of 3 floats that defines a normalized vector that is normal to the best-fit plane :raises LinearError: If there are only 3 coordinates and they are colinear. If there are 4 or more colinear coordinates, one of the infinite number of possible vectors will be returned. :raises ValueError: If there are fewer than 3 sets of coordinates :raises numpy.linalg.LinAlgError: If the SVD does not converge (note - I was unable to find a case where this happened) """ result = fit_plane_to_points(coords) return result.tolist()
[docs]def measure_angle_between_rings( ring1: List[Union[structure._StructureAtom, Tuple[float]]], ring2: List[Union[structure._StructureAtom, Tuple[float]]]) -> float: """ Measure the angle between the 2 rings. All atom arguments must be _StructureAtom objects (returned from the Structure.atom list, and can be from different structures), or XYZ coordinates. NOTE: Periodic boundary conditions (PBC) are NOT honored. """ if len(ring1) < 3 or len(ring2) < 3: raise ValueError("ring1 and ring2 must contain at least 3 atoms each") if isinstance(ring1[0], structure._StructureAtom): ring1_coords = [atom.xyz for atom in ring1] ring2_coords = [atom.xyz for atom in ring2] else: ring1_coords = [list(coord) for coord in ring1] ring2_coords = [list(coord) for coord in ring2] normal1 = mm.Cartesian(fit_plane_to_points(numpy.array(ring1_coords))) normal2 = mm.Cartesian(fit_plane_to_points(numpy.array(ring2_coords))) angle_degrees = mm.get_angle_between_two_vectors(normal1, normal2) return angle_degrees