#
# ProDy: A Python Package for Protein Dynamics Analysis
#
# Copyright (C) 2010-2014 University of Pittsburgh
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
#
from past.utils import old_div
import numpy
from schrodinger.application.desmond.constants import CT_TYPE
from schrodinger.application.desmond.constants import ORIGINAL_INDEX
from schrodinger.application.desmond.packages import topo
[docs]def importLA():
"""Return one of :mod: `scipy.linalg` or :mod: `numpy.linalg`."""
try:
import scipy.linalg as linalg
except ImportError:
try:
import numpy.linalg as linalg
except:
raise ImportError('scipy.linalg or numpy.linalg is required for '
'NMA and structure alignment calculations')
return linalg
NDIM12 = set([1, 2])
NDIM123 = set([1, 2, 3])
[docs]def checkWeights(weights, natoms, ncsets=None, dtype=float):
"""
Return *weights* if it has correct shape ([ncsets, ]natoms, 1).
after its shape and data type is corrected. otherwise raise an exception.
All items of *weights* must be greater than zero.
"""
try:
ndim, shape, wtype = weights.ndim, weights.shape, weights.dtype
except AttributeError:
raise TypeError('weights must be a numpy.ndarray instance')
if ncsets:
if ndim not in NDIM123:
raise ValueError('weights.dim must be 1, 2, or 3')
if ncsets > 1:
if ndim == 3 and shape[0] != ncsets:
raise ValueError('weights.shape must be (ncsets, natoms[, 1])')
weights = numpy.tile(weights.reshape((1, natoms, 1)),
(ncsets, 1, 1))
elif ndim < 3:
weights = weights.reshape((1, natoms, 1))
else:
if ndim not in NDIM12:
raise ValueError('weights.dim must be 1 or 2')
if shape[0] != natoms:
raise ValueError('weights.shape must be (natoms[, 1])')
if ndim == 1:
weights = weights.reshape((natoms, 1))
if dtype:
if wtype != dtype:
try:
weights = weights.astype(dtype)
except ValueError:
raise ValueError('weights.astype({0}) failed, {0} type '
'could not be assigned'.format(str(dtype)))
if any(weights < 0):
raise ValueError('all weights must be greater or equal to 0')
return weights
[docs]def getRMSD(ref, tar, weights=None):
if weights is None:
divByN = old_div(1.0, ref.shape[0])
if tar.ndim == 2:
return numpy.sqrt(((ref - tar)**2).sum() * divByN)
else:
rmsd = numpy.zeros(len(tar))
for i, t in enumerate(tar):
rmsd[i] = ((ref - t)**2).sum()
return numpy.sqrt(rmsd * divByN)
else:
if tar.ndim == 2:
return numpy.sqrt((((ref - tar)**2) * weights).sum() *
(old_div(1, weights.sum())))
else:
rmsd = numpy.zeros(len(tar))
if weights.ndim == 2:
for i, t in enumerate(tar):
rmsd[i] = (((ref - t)**2) * weights).sum()
return numpy.sqrt(rmsd * (old_div(1, weights.sum())))
else:
for i, t in enumerate(tar):
rmsd[i] = (((ref - t)**2) * weights[i]).sum()
return numpy.sqrt(old_div(rmsd, weights.sum(1).flatten()))
[docs]class Ensemble(object):
"""
A class for analysis of arbitrary conformational ensembles.
Adopted from ProDy project (prody.csb.pitt.edu)
"""
[docs] def __init__(self,
cms_model,
tr,
asl_sel='(protein and atom.ptype " CA ")'):
self._coords = None # refrence coordinates
self._n_atoms = 0
self._n_csets = 0 # number of frames
self._weights = None
self._atoms = None
self._confs = None # numpy array of selected atom pos
self.cms_model = cms_model
self._tr = tr
self.asl_sel = asl_sel
for a in self.cms_model.atom:
a.property[ORIGINAL_INDEX] = a.index
self.solute_aids = cms_model.select_atom(CT_TYPE.VAL.SOLUTE)
self._solute_structure = self.cms_model.extract(self.solute_aids)
self._solute_gids = topo.aids2gids(cms_model, self.solute_aids, False)
self._selected_aids = cms_model.select_atom(self.asl_sel)
self._selected_gids = topo.aids2gids(cms_model, self._selected_aids,
False)
self.setCoords()
self.setSoluteCoords()
self.setConfs()
[docs] def __len__(self):
return self._n_csets
def __str__(self):
return ('<Ensemble: {0} frames; {1} atoms; {2} selected atoms>').format(
len(self), self._n_atoms, len(self._indices))
@property
def _indices(self):
"""
Returns a list of selected atom indices
"""
return [self._solute_gids.index(i) for i in self._selected_gids]
[docs] def setSoluteCoords(self):
"""
Coordinate of the solute
"""
self._solute_coords = self._tr[0].pos(self._solute_gids)
self._n_solute_atoms = self._solute_coords.shape[0]
[docs] def setCoords(self):
"""
Coodinates is a reference structure
"""
self._coords = self._tr[0].pos(self._selected_gids)
self._n_atoms = self._coords.shape[0]
[docs] def getCoords(self):
"""
Return a copy of reference coordinates for selected atoms.
"""
if self._coords is None:
return None
return self._coords
[docs] def setWeight(self, weights):
"""
Set atomic weights.
"""
if self._n_atoms == 0:
raise AttributeError('first set reference coordinates')
self._weights = checkWeights(weights, self._n_atoms, None)
[docs] def getWeights(self):
"""
Return a copy of weights of selected atoms.
"""
if self._weights is None:
return None
if self._indices is None:
return self._weights.copy()
if self._weights.ndim == 2:
return self._weights[self._indices]
else:
return self._weights[:, self._indices]
[docs] def setConfs(self):
coords = []
for fr in self._tr:
pos = fr.pos(self._selected_gids)
coords.append(pos)
self._confs = numpy.array(coords)
self._n_csets = len(coords)
def _getCoordsets(self, indices=None):
"""
Returns a copy of coordinate sets() at given *indices*, which may be an
integer, a list of integers or ``None``. ``None`` returns all
coordinate sets.
"""
if self._confs is None:
return None
if self._indices is None:
if indices is None:
return self._confs.copy()
else:
try:
coords = self._confs[indices]
except IndexError:
pass
if coords.base is None:
return coords
else:
return coords.copy()
else:
selids = list(range(len(self._indices)))
if indices is None:
return self._confs.take(selids, 1)
else:
try:
coords = self.confs[indices, selids]
except IndexError:
pass
if coords.base is None:
return coords
else:
return coords.copy()
raise IndexError('indices must be an integer, a list/array of'
'integers, as a slice or None')
[docs] def superpose(self):
"""
Superpose the ensemble onto the reference coordinates.
"""
if self._coords is None:
raise ValueError('coordinates are not set, use `setCoords`')
if self._confs is None or len(self._confs) == 0:
raise ValueError('conformations are not set, use `setCoordsets`')
self._superpose(trans=True)
def _superpose(self, **kwargs):
"""
Superpose conformations and upate coordinates.
"""
linalg = importLA()
svd = linalg.svd
det = linalg.det
indices = self._indices
weights = self._weights
mobs = self._confs
if indices is None:
idx = False
tar = self._coords
movs = None
else:
idx = True
if self._weights is not None:
weights = weights[indices]
tar = self._solute_coords[indices]
movs = self._confs
if weights is None:
tar_com = tar.mean(0)
tar_org = (tar - tar_com)
mob_org = numpy.zeros(tar_org.shape, dtype=mobs.dtype)
tar_org = tar_org.T
else:
weights_sum = weights.sum()
weights_dot = numpy.dot(weights.T, weights)
tar_com = old_div((tar * weights).sum(axis=0), weights_sum)
tar_org = (tar - tar_com)
mob_org = numpy.zeros(tar_org.shape, dtype=mobs.dtype)
for i, mob in enumerate(mobs):
if idx:
mob = mob[list(range(len(indices)))]
if weights is None:
mob_com = mob.mean(0)
matrix = numpy.dot(tar_org,
numpy.subtract(mob, mob_com, mob_org))
else:
mob_com = old_div((mob * weights).sum(axis=0), weights_sum)
numpy.subtract(mob, mob_com, mob_org)
matrix = old_div(
numpy.dot((tar_org * weights).T, (mob_org * weights)),
weights_dot)
U, s, Vh = svd(matrix)
Id = numpy.array([[1, 0, 0], [0, 1, 0],
[0, 0, numpy.sign(det(matrix))]])
rotation = numpy.dot(Vh.T, numpy.dot(Id, U.T))
if movs is None:
mob[i] = numpy.dot(mob_org, rotation)
numpy.add(mobs[i], tar_com, mobs[i])
else:
numpy.add(numpy.dot(movs[i], rotation),
(tar_com - numpy.dot(mob_com, rotation)), movs[i])
[docs] def iterpose(self, rmsd=0.00001):
"""
Iteratively superpose the ensemble until convergence:
1. Conformations are aligned with the reference coordinates
2. mean coordiantes are calculated
3. mean coordiantes are used as reference coordinates
4. repeat until mean coordinates do not change
At the end of the procedure, the reference coordinates set will be
averate of conformations in the ensemble.
"""
if self._coords is None:
raise AttributeError('coordinates are not set, use `setCoords`')
if self._confs is None or len(self._confs) == 0:
raise AttributeError('No trajectory frames available')
rmsdif = 1
step = 0
weights = self._weights
if weights is not None and weights.ndim == 3:
weightsum = weights.sum(axis=0)
length = len(self)
while rmsdif > rmsd:
self._superpose()
if weights is None:
newxyz = old_div(self._confs.sum(0), length)
else:
newxyz = old_div((self._confs * weights).sum(0), weightsum)
rmsdif = getRMSD(self._coords, newxyz)
self._coords = newxyz
step += 1
[docs] def getDeviations(self):
"""Return deviations from reference coordinates for selected atoms.
Conformations can be aligned using one of :meth: `superpose` or
:meth: `iterpose` methods prior to calculating deviations."""
if not isinstance(self._confs, numpy.ndarray):
print('Conformations are not set.')
return None
if not isinstance(self._coords, numpy.ndarray):
print('Coordinates are not set.')
return None
return self._getCoordsets() - self._coords
@property
def solute_st(self):
"""
return maestro structure of solute atoms.
"""
return self._solute_structure
@property
def selected_st(self):
"""
return maestro structure of selected atoms.
"""
tmp_str = self._solute_structure.copy()
tmp_str.setXYZ(self._solute_coords)
atom_num = [i + 1 for i in self._indices]
selected_atoms = tmp_str.extract(atom_num)
return selected_atoms
[docs] def numAtoms(self):
"""
Return number of atoms.
"""
return self._n_atoms
[docs] def numConfs(self):
"""
Return number of frames.
"""
return self._n_csets
[docs] def numSelected(self):
return self._n_atoms
"""
cmsfn = '1ux7-1_10-out.cms'
trj_directory = '1ux7-1_10_trj'
dsim = create_simulation(cmsfn, trj_directory)
e = Ensemble(dsim)
#e.superpose()
#e.iterpose()
#print e.numSelected()
"""