Source code for schrodinger.trajectory.prody.ensemble

#
# ProDy: A Python Package for Protein Dynamics Analysis
#
# Copyright (C) 2010-2014 University of Pittsburgh
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
#

from past.utils import old_div

import numpy

from schrodinger.application.desmond.constants import CT_TYPE
from schrodinger.application.desmond.constants import ORIGINAL_INDEX
from schrodinger.application.desmond.packages import topo


[docs]def importLA():
    """Return one of :mod: `scipy.linalg` or :mod: `numpy.linalg`."""

    try:
        import scipy.linalg as linalg
    except ImportError:
        try:
            import numpy.linalg as linalg
        except:
            raise ImportError('scipy.linalg or numpy.linalg is required for '
                              'NMA and structure alignment calculations')
    return linalg


NDIM12 = set([1, 2])
NDIM123 = set([1, 2, 3])


[docs]def checkWeights(weights, natoms, ncsets=None, dtype=float):
    """
    Return *weights* if it has correct shape ([ncsets, ]natoms, 1).
    after its shape and data type is corrected. otherwise raise an exception.
    All items of *weights* must be greater than zero.
    """

    try:
        ndim, shape, wtype = weights.ndim, weights.shape, weights.dtype
    except AttributeError:
        raise TypeError('weights must be a numpy.ndarray instance')

    if ncsets:
        if ndim not in NDIM123:
            raise ValueError('weights.dim must be 1, 2, or 3')
        if ncsets > 1:
            if ndim == 3 and shape[0] != ncsets:
                raise ValueError('weights.shape must be (ncsets, natoms[, 1])')
            weights = numpy.tile(weights.reshape((1, natoms, 1)),
                                 (ncsets, 1, 1))
        elif ndim < 3:
            weights = weights.reshape((1, natoms, 1))
    else:
        if ndim not in NDIM12:
            raise ValueError('weights.dim must be 1 or 2')
        if shape[0] != natoms:
            raise ValueError('weights.shape must be (natoms[, 1])')
        if ndim == 1:
            weights = weights.reshape((natoms, 1))

    if dtype:
        if wtype != dtype:
            try:
                weights = weights.astype(dtype)
            except ValueError:
                raise ValueError('weights.astype({0}) failed, {0} type '
                                 'could not be assigned'.format(str(dtype)))
    if any(weights < 0):
        raise ValueError('all weights must be greater or equal to 0')

    return weights


[docs]def getRMSD(ref, tar, weights=None):
    if weights is None:
        divByN = old_div(1.0, ref.shape[0])
        if tar.ndim == 2:
            return numpy.sqrt(((ref - tar)**2).sum() * divByN)
        else:
            rmsd = numpy.zeros(len(tar))
            for i, t in enumerate(tar):
                rmsd[i] = ((ref - t)**2).sum()
            return numpy.sqrt(rmsd * divByN)
    else:
        if tar.ndim == 2:
            return numpy.sqrt((((ref - tar)**2) * weights).sum() *
                              (old_div(1, weights.sum())))
        else:
            rmsd = numpy.zeros(len(tar))
            if weights.ndim == 2:
                for i, t in enumerate(tar):
                    rmsd[i] = (((ref - t)**2) * weights).sum()
                return numpy.sqrt(rmsd * (old_div(1, weights.sum())))
            else:
                for i, t in enumerate(tar):
                    rmsd[i] = (((ref - t)**2) * weights[i]).sum()
                return numpy.sqrt(old_div(rmsd, weights.sum(1).flatten()))


[docs]class Ensemble(object):
    """
    A class for analysis of arbitrary conformational ensembles.
    Adopted from ProDy project (prody.csb.pitt.edu)
    """

[docs]    def __init__(self,
                 cms_model,
                 tr,
                 asl_sel='(protein and atom.ptype " CA ")'):

        self._coords = None  # refrence coordinates
        self._n_atoms = 0
        self._n_csets = 0  # number of frames
        self._weights = None
        self._atoms = None
        self._confs = None  # numpy array of selected atom pos
        self.cms_model = cms_model
        self._tr = tr
        self.asl_sel = asl_sel
        for a in self.cms_model.atom:
            a.property[ORIGINAL_INDEX] = a.index

        self.solute_aids = cms_model.select_atom(CT_TYPE.VAL.SOLUTE)
        self._solute_structure = self.cms_model.extract(self.solute_aids)
        self._solute_gids = topo.aids2gids(cms_model, self.solute_aids, False)

        self._selected_aids = cms_model.select_atom(self.asl_sel)
        self._selected_gids = topo.aids2gids(cms_model, self._selected_aids,
                                             False)

        self.setCoords()
        self.setSoluteCoords()
        self.setConfs()

[docs]    def __len__(self):
        return self._n_csets

    def __str__(self):
        return ('<Ensemble: {0} frames; {1} atoms; {2} selected atoms>').format(
            len(self), self._n_atoms, len(self._indices))

    @property
    def _indices(self):
        """
        Returns a list of selected atom indices
        """
        return [self._solute_gids.index(i) for i in self._selected_gids]

[docs]    def setSoluteCoords(self):
        """
        Coordinate of the solute
        """
        self._solute_coords = self._tr[0].pos(self._solute_gids)
        self._n_solute_atoms = self._solute_coords.shape[0]

[docs]    def setCoords(self):
        """
        Coodinates is a reference structure
        """
        self._coords = self._tr[0].pos(self._selected_gids)
        self._n_atoms = self._coords.shape[0]

[docs]    def getCoords(self):
        """
        Return a copy of reference coordinates for selected atoms.
        """
        if self._coords is None:
            return None
        return self._coords

[docs]    def setWeight(self, weights):
        """
        Set atomic weights.
        """
        if self._n_atoms == 0:
            raise AttributeError('first set reference coordinates')
        self._weights = checkWeights(weights, self._n_atoms, None)

[docs]    def getWeights(self):
        """
        Return a copy of weights of selected atoms.
        """
        if self._weights is None:
            return None
        if self._indices is None:
            return self._weights.copy()
        if self._weights.ndim == 2:
            return self._weights[self._indices]
        else:
            return self._weights[:, self._indices]

[docs]    def setConfs(self):
        coords = []
        for fr in self._tr:
            pos = fr.pos(self._selected_gids)
            coords.append(pos)
        self._confs = numpy.array(coords)
        self._n_csets = len(coords)

    def _getCoordsets(self, indices=None):
        """
        Returns a copy of coordinate sets() at given *indices*, which may be an
        integer, a list of integers or ``None``.  ``None`` returns all
        coordinate sets.
        """
        if self._confs is None:
            return None
        if self._indices is None:
            if indices is None:
                return self._confs.copy()
            else:
                try:
                    coords = self._confs[indices]
                except IndexError:
                    pass
                if coords.base is None:
                    return coords
                else:
                    return coords.copy()
        else:
            selids = list(range(len(self._indices)))
            if indices is None:
                return self._confs.take(selids, 1)
            else:
                try:
                    coords = self.confs[indices, selids]
                except IndexError:
                    pass
                if coords.base is None:
                    return coords
                else:
                    return coords.copy()
        raise IndexError('indices must be an integer, a list/array of'
                         'integers, as a slice or None')

[docs]    def superpose(self):
        """
        Superpose the ensemble onto the reference coordinates.
        """
        if self._coords is None:
            raise ValueError('coordinates are not set, use `setCoords`')
        if self._confs is None or len(self._confs) == 0:
            raise ValueError('conformations are not set, use `setCoordsets`')
        self._superpose(trans=True)

    def _superpose(self, **kwargs):
        """
        Superpose conformations and upate coordinates.
        """
        linalg = importLA()
        svd = linalg.svd
        det = linalg.det

        indices = self._indices
        weights = self._weights
        mobs = self._confs

        if indices is None:
            idx = False
            tar = self._coords
            movs = None
        else:
            idx = True
            if self._weights is not None:
                weights = weights[indices]
            tar = self._solute_coords[indices]
            movs = self._confs

        if weights is None:
            tar_com = tar.mean(0)
            tar_org = (tar - tar_com)
            mob_org = numpy.zeros(tar_org.shape, dtype=mobs.dtype)
            tar_org = tar_org.T
        else:
            weights_sum = weights.sum()
            weights_dot = numpy.dot(weights.T, weights)
            tar_com = old_div((tar * weights).sum(axis=0), weights_sum)
            tar_org = (tar - tar_com)
            mob_org = numpy.zeros(tar_org.shape, dtype=mobs.dtype)

        for i, mob in enumerate(mobs):
            if idx:
                mob = mob[list(range(len(indices)))]
            if weights is None:
                mob_com = mob.mean(0)
                matrix = numpy.dot(tar_org,
                                   numpy.subtract(mob, mob_com, mob_org))
            else:
                mob_com = old_div((mob * weights).sum(axis=0), weights_sum)
                numpy.subtract(mob, mob_com, mob_org)
                matrix = old_div(
                    numpy.dot((tar_org * weights).T, (mob_org * weights)),
                    weights_dot)
            U, s, Vh = svd(matrix)
            Id = numpy.array([[1, 0, 0], [0, 1, 0],
                              [0, 0, numpy.sign(det(matrix))]])
            rotation = numpy.dot(Vh.T, numpy.dot(Id, U.T))

            if movs is None:
                mob[i] = numpy.dot(mob_org, rotation)
                numpy.add(mobs[i], tar_com, mobs[i])
            else:
                numpy.add(numpy.dot(movs[i], rotation),
                          (tar_com - numpy.dot(mob_com, rotation)), movs[i])

[docs]    def iterpose(self, rmsd=0.00001):
        """
        Iteratively superpose the ensemble until convergence:

        1. Conformations are aligned with the reference coordinates
        2. mean coordiantes are calculated
        3. mean coordiantes are used as reference coordinates
        4. repeat until mean coordinates do not change

        At the end of the procedure, the reference coordinates set will be
        averate of conformations in the ensemble.
        """
        if self._coords is None:
            raise AttributeError('coordinates are not set, use `setCoords`')
        if self._confs is None or len(self._confs) == 0:
            raise AttributeError('No trajectory frames available')
        rmsdif = 1
        step = 0
        weights = self._weights
        if weights is not None and weights.ndim == 3:
            weightsum = weights.sum(axis=0)
        length = len(self)
        while rmsdif > rmsd:
            self._superpose()
            if weights is None:
                newxyz = old_div(self._confs.sum(0), length)
            else:
                newxyz = old_div((self._confs * weights).sum(0), weightsum)
            rmsdif = getRMSD(self._coords, newxyz)
            self._coords = newxyz
            step += 1

[docs]    def getDeviations(self):
        """Return deviations from reference coordinates for selected atoms.
        Conformations can be aligned using one of :meth: `superpose` or
        :meth: `iterpose` methods prior to calculating deviations."""

        if not isinstance(self._confs, numpy.ndarray):
            print('Conformations are not set.')
            return None
        if not isinstance(self._coords, numpy.ndarray):
            print('Coordinates are not set.')
            return None
        return self._getCoordsets() - self._coords

    @property
    def solute_st(self):
        """
        return maestro structure of solute atoms.
        """
        return self._solute_structure

    @property
    def selected_st(self):
        """
        return maestro structure of selected atoms.
        """
        tmp_str = self._solute_structure.copy()
        tmp_str.setXYZ(self._solute_coords)
        atom_num = [i + 1 for i in self._indices]
        selected_atoms = tmp_str.extract(atom_num)
        return selected_atoms

[docs]    def numAtoms(self):
        """
        Return number of atoms.
        """
        return self._n_atoms

[docs]    def numConfs(self):
        """
        Return number of frames.
        """
        return self._n_csets

[docs]    def numSelected(self):
        return self._n_atoms


"""
cmsfn = '1ux7-1_10-out.cms'
trj_directory = '1ux7-1_10_trj'

dsim = create_simulation(cmsfn, trj_directory)
e = Ensemble(dsim)
#e.superpose()
#e.iterpose()
#print e.numSelected()
"""