Source code for schrodinger.trajectory.prody.writenmd

#
# ProDy: A Python Package for Protein Dynamics Analysis
#
# Copyright (C) 2010-2014 University of Pittsburgh
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
#
"""This module defines input and output functions for NMD format.

.. _nmd-format:

NMD Format
-------------------------------------------------------------------------------

Description
^^^^^^^^^^^

NMD files (extension :file: `.nmd`) are plain text files that contain at
least normal mode and system coordinate data.

NMD files can be visualized using :ref: `nmwiz`.  ProDy functions
:func: `.writeNMD` and :func: `.parseNMD` can be used to read and write NMD
files.

Data fields
^^^^^^^^^^^

Data fields in bold face are required. All data arrays and lists must be in a
single line and items must be separated by one or more space characters.

**coordinates**: system coordinates as a list of decimal numbers
  Coordinate array is the most important line in an NMD file. All mode array
  lengths must match the length of the coordinate array. Also, number of atoms
  in the system is deduced from the length of the coordinate array.

::

  coordinates 27.552 4.354 23.629 24.179 4.807 21.907 ...

**mode**: normal mode array as a list of decimal numbers
  Optionally, mode index and a scaling factor may be provided
  in the same line as a mode array. Both of these must precede the mode array.
  Providing a scaling factor enables relative scaling of the mode arrows and
  the amplitude of the fluctuations in animations. For NMA, scaling factors
  may be chosen to be the square-root of the inverse-eigenvalue associated
  with the mode. Analogously, for PCA data, scaling factor would be the
  square-root of the eigenvalue.

  If a mode line contains numbers preceding the mode array, they are evaluated
  based on their type. If an integer is encountered, it is considered the mode
  index. If a decimal number is encountered, it is considered the scaling
  factor. Scaling factor may be the square-root of the inverse eigenvalue
  if data is from an elastic network model, or the square-root of the
  eigenvalue if data is from an essential dynamics (or principal component)
  analysis.

  For example, all of the following lines are valid. The first line contains
  mode index and scaling factor. Second and third lines contain mode index or
  scaling factor. Last line contains only the mode array.

::

  mode 1 2.37    0.039 0.009 0.058 0.038 -0.011 0.052  ...
  mode 1    0.039 0.009 0.058 0.038 -0.011 0.052  ...
  mode 2.37    0.039 0.009 0.058 0.038 -0.011 0.052  ...
  mode 0.039 0.009 0.058 0.038 -0.011 0.052 0.043  ...

*name*: name of the model

The length of all following data fields must be equal to the number of atoms in
the system. NMWiz uses such data when writing a temporary PDB files for
loading coordinate data into VMD.

*atomnames*: list of atom names
  If not provided, all atom names are set to "CA".

*resnames*: list of residue names
  If not provided, all residue names are set to "GLY".

*chainids*: list of chain identifiers
  If not provided, all chain identifiers are set to "A".

*resids*: list of residue numbers
  If not provided, residue numbers are started from 1 and incremented by one
  for each atom.

*bfactors*: list of experimental beta-factors
  If not provided, all beta-factors are set to zero.
  Beta-factors can be used to color the protein representation.

NMD files may contain additional lines. Only lines that start with one of the
above field names are evaluated by NMWiz.


Autoload Trick
^^^^^^^^^^^^^^

By adding a special line in an NMD file, file content can be automatically
loaded into VMD at startup. The first line calls a NMWiz function to load the
file itself (:file: `xyzeros.nmd`).

::

  nmwiz_load xyzeros.nmd
  coordinates 0 0 0 0 0 0  ...
  mode 0.039 0.009 0.058 0.038 -0.011 0.052 ...
  mode -0.045 -0.096 -0.009 -0.040 -0.076 -0.010 ...
  mode 0.007 -0.044 0.080 0.015 -0.037 0.062 ...


In this case, VMD must be started from the command line by typing
:program: `vmd -e xyzeros.nmd`."""

import gzip
import os
import zipfile
from os.path import abspath
from os.path import isfile
from os.path import join
from os.path import split
from os.path import splitext
from past.utils import old_div

import numpy as np

#from prody.utilities import openFile, addext
import schrodinger.structure as structure

from .mode import Mode
from .mode import Vector
from .modeset import ModeSet
from .nma import NMA

__all__ = ['writeNMD', 'writeNMDmae']

ZERO = 1e-6


[docs]def gzip_open(filename, *args, **kwargs):
    if args and "t" in args[0]:
        args = (args[0].replace("t", ""),) + args[1:]
    if isinstance(filename, str):
        return gzip.open(filename, *args, **kwargs)
    else:
        return gzip.GzipFile(filename, *args, **kwargs)


OPEN = {
    '.gz': gzip_open,
    '.zip': zipfile.ZipFile,
}


[docs]def openFile(filename, *args, **kwargs):
    """
    Open `filename` for reading, writing, or appending. First argument in
    `args` is treated as the mode. Opening :file: `.gz` and :file: `.zip` files
    for reading and writing is handled automatically.

    :arg backup: backup existing file using :func: `.backupFile` when opening
        in append or write modes, default is obtained from package settings
    :type backup: bool

    :arg backup_ext: extension for backup file, default is :file: `.BAK`
    :type backup_ext: str
    """

    try:
        exists = os.path.isfile(filename)
    except Exception as err:
        raise TypeError('filename must be a string ({0})'.format(str(err)))

    folder = kwargs.pop('folder', None)
    if folder:
        filename = join(folder, filename)

    backup = kwargs.pop('backup', None)
    if backup is not None and backup and args and args[0][0] in ('a', 'w'):
        backupFile(filename,
                   backup=backup,
                   backup_ext=kwargs.pop('backup_ext', None))

    ext = splitext(filename)[1]
    return OPEN.get(ext.lower(), open)(filename, *args, **kwargs)


[docs]def backupFile(filename, backup=None, backup_ext='.BAK', **kwargs):
    """
    Rename `filename` with `backup_ext` appended to its name for backup
    purposes, if `backup` is `True` or if automatic backups is turned on using
    :func: `.confProDy`. Default extension :file: `.BAK` is used when one is
    not set using :func: `.confProDy`. If `filename` does not exist, no action
    will be taken and `filename` will be returned. If file is successfully
    renamed, new filename will be returned.
    """

    try:
        exists = isfile(filename)
    except Exception as err:
        raise TypeError('filename must be a string ({0})'.format(str(err)))

    if exists and backup:
        bak = filename + backup_ext
        if isfile(bak):
            try:
                os.remove(bak)
            except Exception as err:
                pass
        try:
            os.rename(filename, bak)
        except Exception as err:
            pass
        return bak
    else:
        return filename


[docs]def addext(filename, extension):
    """Return *filename*, with *extension* if it does not have one."""

    return filename + ('' if splitext(filename)[1] else extension)


[docs]def orig_idx_to_cur_idx(st):
    mapping = {}
    for atom in st.atom:
        mapping[atom.property['i_m_original_index']] = atom.index
    return mapping


[docs]def setup_atom(mode_number, atom, vector):
    coordinates = ['x', 'y', 'z']
    for c, v in zip(coordinates, vector):
        prop = 'r_m_eig_vector_%i_%s' % (mode_number, c)
        atom.property[prop] = v


[docs]def writeNMDmae(filename, modes, solute_st, selected_st):
    """
    return *filename* of a maestro structure file which includes modes
    information.
    """
    print("in writeNMDmae")
    solute_map = orig_idx_to_cur_idx(solute_st)
    selected_map = orig_idx_to_cur_idx(selected_st)
    st = solute_st.copy()
    for mode in modes:
        mode_idx = mode.getIndex() + 1
        var = mode.getVariance()
        trace = mode.getModel().getTrace()
        fractVariance = old_div(var, trace)
        fractVariance_str = 'r_m_fraction_of_variance_mode_%i' % mode_idx
        st.property[fractVariance_str] = fractVariance
        vectors = mode._getArray()
        for aid, atom in enumerate(selected_st.atom):
            original_idx = atom.property['i_m_original_index']
            new_idx = solute_map[original_idx]
            new_atom = st.atom[new_idx]
            offset = aid * 3
            setup_atom(mode_idx, new_atom, vectors[offset:offset + 3])

    st.write(filename + '.mae')


[docs]def writeNMD(filename, modes, struct):
    """
    Return `filename` that contains `modes` and `structure` data in NMD format
    described in :ref: `nmd-format`. :file: `.nmd` extension is appended to
    filename, if it does not have an extension.

    .. note::
       #. This function skips modes with zero eigenvalues.
       #. If a :class: `.Vector` instance is given, it will be normalized
          before it is written. It's length before normalization will be
          written as the scaling factor of the vector.
    """

    if isinstance(struct, (structure.Structure)):
        struct.write(filename + '.mae')

    if not isinstance(modes, (NMA, ModeSet, Mode, Vector)):
        raise TypeError('modes must be NMA, ModeSet, Mode, or Vector, '
                        'not {0}'.format(type(modes)))
    if modes.numAtoms() != struct.atom_total:
        raise Exception('number of atoms do not match')
    out = openFile(addext(filename, '.nmd'), 'w')

    out.write('nmwiz_load {0}\n'.format(abspath(filename)))
    name = modes.getTitle()
    name = name.replace(' ', '_').replace('.', '_')
    if not name.replace('_', '').isalnum() or len(name) > 30:
        name = name.replace(' ', '_').replace('.', '_')
    if not name.replace('_', '').isalnum() or len(name) > 30:
        name = splitext(split(filename)[1])[0]
    out.write('name {0}\n'.format(name))
    try:
        coords = struct.getXYZ()
    except:
        raise ValueError('coordinates could not be retrieved from atoms')
    if coords is None:
        raise ValueError('atom coordinates are not set')
    try:
        data = [a.pdbname.strip() for a in struct.atom]
        if data is not None:
            out.write('atomnames {0}\n'.format(' '.join(data)))
    except:
        pass
    try:
        data = [a.pdbres.strip() for a in struct.atom]
        if data is not None:
            out.write('resnames {0}\n'.format(' '.join(data)))
    except:
        pass

    try:
        data = np.array([a.resnum for a in struct.atom])
        if data is not None:
            out.write('resids ')
            data.tofile(out, ' ')
            out.write('\n')
    except:
        pass
    try:
        data = [a.chain.strip() for a in struct.atom]
        if data is not None:
            out.write('chainids {0}\n'.format(' '.join(data)))
    except:
        pass
    try:
        data = ["PROT"] * struct.atom_total
        if data is not None:
            out.write('segnames {0}\n'.format(' '.join(data)))
    except:
        pass

    try:
        data = np.array([float(a.property['r_m_pdb_tfactor']) \
                         for a in struct.atom])
        if data is not None:
            out.write('bfactors ')
            data.tofile(out, ' ', '%.2f')
            out.write('\n')
    except:
        pass

    format = '{0:.3f}'.format
    out.write('coordinates ')
    coords.tofile(out, ' ', '%.3f')
    out.write('\n')
    count = 0
    if isinstance(modes, Vector):
        out.write('mode 1 {0:.2f} '.format(abs(modes)))
        modes.getNormed()._getArray().tofile(out, ' ', '%.3f')
        out.write('\n')
        count += 1
    else:
        if isinstance(modes, Mode):
            modes = [modes]
        for mode in modes:
            if mode.getEigval() < ZERO:
                continue
            out.write('mode {0} {1:.2f} '.format(mode.getIndex() + 1,
                                                 mode.getVariance()**0.5))
            arr = mode._getArray().tofile(out, ' ', '%.3f')
            out.write('\n')
            count += 1
    if count == 0:
        print(
            'No normal mode data was written. Given modes might have 0 eigenvalues.'
        )
    return filename