Source code for schrodinger.trajectory.prody.ensemble

#
# ProDy: A Python Package for Protein Dynamics Analysis
#
# Copyright (C) 2010-2014 University of Pittsburgh
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
#

from past.utils import old_div

import numpy

from schrodinger.application.desmond.constants import CT_TYPE
from schrodinger.application.desmond.constants import ORIGINAL_INDEX
from schrodinger.application.desmond.packages import topo


[docs]def importLA(): """Return one of :mod: `scipy.linalg` or :mod: `numpy.linalg`.""" try: import scipy.linalg as linalg except ImportError: try: import numpy.linalg as linalg except: raise ImportError('scipy.linalg or numpy.linalg is required for ' 'NMA and structure alignment calculations') return linalg
NDIM12 = set([1, 2]) NDIM123 = set([1, 2, 3])
[docs]def checkWeights(weights, natoms, ncsets=None, dtype=float): """ Return *weights* if it has correct shape ([ncsets, ]natoms, 1). after its shape and data type is corrected. otherwise raise an exception. All items of *weights* must be greater than zero. """ try: ndim, shape, wtype = weights.ndim, weights.shape, weights.dtype except AttributeError: raise TypeError('weights must be a numpy.ndarray instance') if ncsets: if ndim not in NDIM123: raise ValueError('weights.dim must be 1, 2, or 3') if ncsets > 1: if ndim == 3 and shape[0] != ncsets: raise ValueError('weights.shape must be (ncsets, natoms[, 1])') weights = numpy.tile(weights.reshape((1, natoms, 1)), (ncsets, 1, 1)) elif ndim < 3: weights = weights.reshape((1, natoms, 1)) else: if ndim not in NDIM12: raise ValueError('weights.dim must be 1 or 2') if shape[0] != natoms: raise ValueError('weights.shape must be (natoms[, 1])') if ndim == 1: weights = weights.reshape((natoms, 1)) if dtype: if wtype != dtype: try: weights = weights.astype(dtype) except ValueError: raise ValueError('weights.astype({0}) failed, {0} type ' 'could not be assigned'.format(str(dtype))) if any(weights < 0): raise ValueError('all weights must be greater or equal to 0') return weights
[docs]def getRMSD(ref, tar, weights=None): if weights is None: divByN = old_div(1.0, ref.shape[0]) if tar.ndim == 2: return numpy.sqrt(((ref - tar)**2).sum() * divByN) else: rmsd = numpy.zeros(len(tar)) for i, t in enumerate(tar): rmsd[i] = ((ref - t)**2).sum() return numpy.sqrt(rmsd * divByN) else: if tar.ndim == 2: return numpy.sqrt((((ref - tar)**2) * weights).sum() * (old_div(1, weights.sum()))) else: rmsd = numpy.zeros(len(tar)) if weights.ndim == 2: for i, t in enumerate(tar): rmsd[i] = (((ref - t)**2) * weights).sum() return numpy.sqrt(rmsd * (old_div(1, weights.sum()))) else: for i, t in enumerate(tar): rmsd[i] = (((ref - t)**2) * weights[i]).sum() return numpy.sqrt(old_div(rmsd, weights.sum(1).flatten()))
[docs]class Ensemble(object): """ A class for analysis of arbitrary conformational ensembles. Adopted from ProDy project (prody.csb.pitt.edu) """
[docs] def __init__(self, cms_model, tr, asl_sel='(protein and atom.ptype " CA ")'): self._coords = None # refrence coordinates self._n_atoms = 0 self._n_csets = 0 # number of frames self._weights = None self._atoms = None self._confs = None # numpy array of selected atom pos self.cms_model = cms_model self._tr = tr self.asl_sel = asl_sel for a in self.cms_model.atom: a.property[ORIGINAL_INDEX] = a.index self.solute_aids = cms_model.select_atom(CT_TYPE.VAL.SOLUTE) self._solute_structure = self.cms_model.extract(self.solute_aids) self._solute_gids = topo.aids2gids(cms_model, self.solute_aids, False) self._selected_aids = cms_model.select_atom(self.asl_sel) self._selected_gids = topo.aids2gids(cms_model, self._selected_aids, False) self.setCoords() self.setSoluteCoords() self.setConfs()
[docs] def __len__(self): return self._n_csets
def __str__(self): return ('<Ensemble: {0} frames; {1} atoms; {2} selected atoms>').format( len(self), self._n_atoms, len(self._indices)) @property def _indices(self): """ Returns a list of selected atom indices """ return [self._solute_gids.index(i) for i in self._selected_gids]
[docs] def setSoluteCoords(self): """ Coordinate of the solute """ self._solute_coords = self._tr[0].pos(self._solute_gids) self._n_solute_atoms = self._solute_coords.shape[0]
[docs] def setCoords(self): """ Coodinates is a reference structure """ self._coords = self._tr[0].pos(self._selected_gids) self._n_atoms = self._coords.shape[0]
[docs] def getCoords(self): """ Return a copy of reference coordinates for selected atoms. """ if self._coords is None: return None return self._coords
[docs] def setWeight(self, weights): """ Set atomic weights. """ if self._n_atoms == 0: raise AttributeError('first set reference coordinates') self._weights = checkWeights(weights, self._n_atoms, None)
[docs] def getWeights(self): """ Return a copy of weights of selected atoms. """ if self._weights is None: return None if self._indices is None: return self._weights.copy() if self._weights.ndim == 2: return self._weights[self._indices] else: return self._weights[:, self._indices]
[docs] def setConfs(self): coords = [] for fr in self._tr: pos = fr.pos(self._selected_gids) coords.append(pos) self._confs = numpy.array(coords) self._n_csets = len(coords)
def _getCoordsets(self, indices=None): """ Returns a copy of coordinate sets() at given *indices*, which may be an integer, a list of integers or ``None``. ``None`` returns all coordinate sets. """ if self._confs is None: return None if self._indices is None: if indices is None: return self._confs.copy() else: try: coords = self._confs[indices] except IndexError: pass if coords.base is None: return coords else: return coords.copy() else: selids = list(range(len(self._indices))) if indices is None: return self._confs.take(selids, 1) else: try: coords = self.confs[indices, selids] except IndexError: pass if coords.base is None: return coords else: return coords.copy() raise IndexError('indices must be an integer, a list/array of' 'integers, as a slice or None')
[docs] def superpose(self): """ Superpose the ensemble onto the reference coordinates. """ if self._coords is None: raise ValueError('coordinates are not set, use `setCoords`') if self._confs is None or len(self._confs) == 0: raise ValueError('conformations are not set, use `setCoordsets`') self._superpose(trans=True)
def _superpose(self, **kwargs): """ Superpose conformations and upate coordinates. """ linalg = importLA() svd = linalg.svd det = linalg.det indices = self._indices weights = self._weights mobs = self._confs if indices is None: idx = False tar = self._coords movs = None else: idx = True if self._weights is not None: weights = weights[indices] tar = self._solute_coords[indices] movs = self._confs if weights is None: tar_com = tar.mean(0) tar_org = (tar - tar_com) mob_org = numpy.zeros(tar_org.shape, dtype=mobs.dtype) tar_org = tar_org.T else: weights_sum = weights.sum() weights_dot = numpy.dot(weights.T, weights) tar_com = old_div((tar * weights).sum(axis=0), weights_sum) tar_org = (tar - tar_com) mob_org = numpy.zeros(tar_org.shape, dtype=mobs.dtype) for i, mob in enumerate(mobs): if idx: mob = mob[list(range(len(indices)))] if weights is None: mob_com = mob.mean(0) matrix = numpy.dot(tar_org, numpy.subtract(mob, mob_com, mob_org)) else: mob_com = old_div((mob * weights).sum(axis=0), weights_sum) numpy.subtract(mob, mob_com, mob_org) matrix = old_div( numpy.dot((tar_org * weights).T, (mob_org * weights)), weights_dot) U, s, Vh = svd(matrix) Id = numpy.array([[1, 0, 0], [0, 1, 0], [0, 0, numpy.sign(det(matrix))]]) rotation = numpy.dot(Vh.T, numpy.dot(Id, U.T)) if movs is None: mob[i] = numpy.dot(mob_org, rotation) numpy.add(mobs[i], tar_com, mobs[i]) else: numpy.add(numpy.dot(movs[i], rotation), (tar_com - numpy.dot(mob_com, rotation)), movs[i])
[docs] def iterpose(self, rmsd=0.00001): """ Iteratively superpose the ensemble until convergence: 1. Conformations are aligned with the reference coordinates 2. mean coordiantes are calculated 3. mean coordiantes are used as reference coordinates 4. repeat until mean coordinates do not change At the end of the procedure, the reference coordinates set will be averate of conformations in the ensemble. """ if self._coords is None: raise AttributeError('coordinates are not set, use `setCoords`') if self._confs is None or len(self._confs) == 0: raise AttributeError('No trajectory frames available') rmsdif = 1 step = 0 weights = self._weights if weights is not None and weights.ndim == 3: weightsum = weights.sum(axis=0) length = len(self) while rmsdif > rmsd: self._superpose() if weights is None: newxyz = old_div(self._confs.sum(0), length) else: newxyz = old_div((self._confs * weights).sum(0), weightsum) rmsdif = getRMSD(self._coords, newxyz) self._coords = newxyz step += 1
[docs] def getDeviations(self): """Return deviations from reference coordinates for selected atoms. Conformations can be aligned using one of :meth: `superpose` or :meth: `iterpose` methods prior to calculating deviations.""" if not isinstance(self._confs, numpy.ndarray): print('Conformations are not set.') return None if not isinstance(self._coords, numpy.ndarray): print('Coordinates are not set.') return None return self._getCoordsets() - self._coords
@property def solute_st(self): """ return maestro structure of solute atoms. """ return self._solute_structure @property def selected_st(self): """ return maestro structure of selected atoms. """ tmp_str = self._solute_structure.copy() tmp_str.setXYZ(self._solute_coords) atom_num = [i + 1 for i in self._indices] selected_atoms = tmp_str.extract(atom_num) return selected_atoms
[docs] def numAtoms(self): """ Return number of atoms. """ return self._n_atoms
[docs] def numConfs(self): """ Return number of frames. """ return self._n_csets
[docs] def numSelected(self): return self._n_atoms
""" cmsfn = '1ux7-1_10-out.cms' trj_directory = '1ux7-1_10_trj' dsim = create_simulation(cmsfn, trj_directory) e = Ensemble(dsim) #e.superpose() #e.iterpose() #print e.numSelected() """