Source code for gromacs.fileformats.xpm

# GromacsWrapper: xpm.py
# Copyright (c) 2012 Oliver Beckstein <orbeckst@gmail.com>
# Copyright (c) 2010 Tsjerk Wassenaar <tsjerkw@gmail.com>
# Released under the GNU Public License 3 (or higher, your choice)
# See the file COPYING for details.
"""
Gromacs XPM file format
=======================

Gromacs stores matrix data in the xpm file format. This implementation
of a Python reader is based on Tsjerk Wassenaar's post to gmx-users
`numerical matrix from xpm file`_ (Mon Oct 4 13:05:26 CEST 2010). This
version returns a NumPy array and can guess an appropriate dtype for
the array.

.. _numerical matrix from xpm file:
   http://lists.gromacs.org/pipermail/gmx-users/2010-October/054557.html

Classes
-------

.. autoclass:: XPM
   :members:

   .. attribute:: xvalues

      Values of on the x-axis, extracted from the xpm file.

   .. attribute:: yvalues

      Values of on the y-axis, extracted from the xpm file. These are
      in the same order as the rows in the xpm matrix. If *reverse* =
      ``False`` then this is typically a *descending* list of numbers
      (highest to lowest residue number, index number, etc). For
      *reverse* = ``True`` it is resorted accordingly.



Example: Analysing H-bonds
--------------------------

Run :func:`gromacs.g_hbond` to produce the existence map (and the log
file for the atoms involved in the bonds; the ndx file is also
useful)::

  gromacs.g_hbond(s=TPR, f=XTC, g="hbond.log", hbm="hb.xpm", hbn="hb.ndx")

Load the XPM::

  hb = XPM("hb.xpm", reverse=True)

Calculate the fraction of time that each H-bond existed::

  hb_fraction = hb.array.mean(axis=0)

Get the descriptions of the bonds::

  desc = [line.strip() for line in open("hbond.log") if not line.startswith('#')]

.. Note::

   It is important that ``reverse=True`` is set so that the rows in
   the xpm matrix are brought in the same order as the H-bond
   labels.

Show the results::

  print "\\n".join(["%-40s %4.1f%%" % p for p in zip(desc, 100*hb_fraction)])

"""

import os, errno
import re
import warnings

import numpy

from ..exceptions import ParseError, AutoCorrectionWarning
from .. import utilities
from .convert import Autoconverter

import logging


[docs] class XPM(utilities.FileUtils): """Class to make a Gromacs XPM matrix available as a NumPy :class:`numpy.ndarray`. The data is available in the attribute :attr:`XPM.array`. .. Note:: By default, the rows (2nd dimension) in the :attr:`XPM.array` are re-ordered so that row 0 (i.e. ``array[:,0]`` corresponds to the first residue/hydrogen bond/etc. The original xpm matrix is obtained for *reverse* = ``False``. The :class:`XPM` reader always reorders the :attr:`XPM.yvalues` (obtained from the xpm file) to match the order of the rows. """ default_extension = "xpm" logger = logging.getLogger("gromacs.formats.XPM") #: compiled regular expression to parse the colors in the xpm file:: #: #: static char *gromacs_xpm[] = { #: "14327 9 2 1", #: " c #FFFFFF " /* "None" */, #: "o c #FF0000 " /* "Present" */, #: #: Matches are named "symbol", "color" (hex string), and "value". "value" #: is typically autoconverted to appropriate values with #: :class:`gromacs.fileformats.convert.Autoconverter`. #: The symbol is matched as a `printable ASCII character`_ in the range #: 0x20 (space) to 0x7E (~). #: #: .. _`printable ASCII character`: http://www.danshort.com/ASCIImap/indexhex.htm COLOUR = re.compile( r""" ^.*" # start with quotation mark (?P<symbol>[\x20-\x7E])# printable ASCII symbol used in the actual pixmap: 'space' to '~' \s+ # white-space separated c\s+ # 'c' to prefix colour?? (?P<color>\#[0-9A-F]+) # colour as hex string (always??) \s*" # close with quotes \s*/\*\s* # white space then opening C-comment /* " # start new string (?P<value>.*) # description/value as free form string " # ... terminated by quotes """, re.VERBOSE, ) def __init__(self, filename=None, **kwargs): """Initialize xpm structure. :Arguments: *filename* read from xpm file directly *autoconvert* try to guess the type of the output array from the colour legend [``True``] *reverse* reverse rows (2nd dimension): re-orders the rows so that the first row corresponds e.g. to the first residue or first H-bonds and not the last) [``True``] """ self.autoconvert = kwargs.pop("autoconvert", True) self.reverse = kwargs.pop("reverse", True) self.__array = None super(XPM, self).__init__( **kwargs ) # can use kwargs to set dict! (but no sanity checks!) if filename is not None: self._init_filename(filename) self.read(filename) def to_df(self): import pandas as pd # Add Time to the data as column data = numpy.vstack((self.xvalues, self.array.T)).T # Column names are resids df = pd.DataFrame(data, columns=["Time"] + list(self.yvalues)) # Converts Time to a numeric type df["Time"] = pd.to_numeric(df["Time"]) return df @property def array(self): """XPM matrix as a :class:`numpy.ndarray`. The attribute itself cannot be assigned a different array but the contents of the array can be modified. """ return self.__array
[docs] def read(self, filename=None): """Read and parse mdp file *filename*.""" self._init_filename(filename) self.parse()
[docs] def parse(self): """Parse the xpm file and populate :attr:`XPM.array`.""" with utilities.openany(self.real_filename) as xpm: # Read in lines until we find the start of the array meta = [xpm.readline()] while not meta[-1].startswith("static char *gromacs_xpm[]"): meta.append(xpm.readline()) # The next line will contain the dimensions of the array dim = xpm.readline() # There are four integers surrounded by quotes # nx: points along x, ny: points along y, nc: ?, nb: stride x nx, ny, nc, nb = [int(i) for i in self.unquote(dim).split()] # The next dim[2] lines contain the color definitions # Each pixel is encoded by dim[3] bytes, and a comment # at the end of the line contains the corresponding value colors = dict([self.col(xpm.readline()) for i in range(nc)]) if self.autoconvert: autoconverter = Autoconverter(mode="singlet") for symbol, value in colors.items(): colors[symbol] = autoconverter.convert(value) self.logger.debug("Autoconverted colours: %r", colors) # make an array containing all possible values and let numpy figure out the dtype dtype = numpy.array(colors.values()).dtype self.logger.debug("Guessed array type: %s", dtype.name) # pre-allocate array data = numpy.zeros((int(nx / nb), ny), dtype=dtype) self.logger.debug( "dimensions: NX=%d NY=%d strideX=%d (NC=%d) --> (%d, %d)", nx, ny, nb, nc, nx / nb, ny, ) iy = 0 xval = [] yval = [] autoconverter = Autoconverter(mode="singlet") for line in xpm: if line.startswith("/*"): # lines '/* x-axis:' ... and '/* y-axis:' contain the # values of x and y coordinates s = self.uncomment(line).strip() if s.startswith("x-axis:"): xval.extend([autoconverter.convert(x) for x in s[7:].split()]) elif s.startswith("y-axis:"): yval.extend([autoconverter.convert(y) for y in s[7:].split()]) continue s = self.unquote(line) # Joao M. Damas <jmdamas@itqb.unl.pt> suggests on gmx-users (24 Oct 2014) # that the next line should read: # # data[:, iy] = [colors[j[k:k+nb]] for k in range(0,nx*nb,nb)] # # "if one is using higher -nlevels for the .xpm construction (in g_rms, for example)" # However, without a test case I am not eager to change it right away so in # case some faulty behavior is discovered with the XPM reader then this comment # might be helpful. --- Oliver 2014-10-25 data[:, iy] = [colors[s[k : k + nb]] for k in range(0, nx, nb)] self.logger.debug( "read row %d with %d columns: '%s....%s'", iy, data.shape[0], s[:4], s[-4:], ) iy += 1 # for next row self.xvalues = numpy.array(xval) if self.reverse: self.logger.debug("reversed row order, reverse=%r", self.reverse) self.__array = data[:, ::-1] self.yvalues = numpy.array(yval) else: self.__array = data self.yvalues = numpy.array(yval)[::-1] # must reverse y-values to match!
[docs] @staticmethod def unquote(s): """Return string *s* with quotes ``"`` removed.""" return s[1 + s.find('"') : s.rfind('"')]
[docs] @staticmethod def uncomment(s): """Return string *s* with C-style comments ``/*`` ... ``*/`` removed.""" return s[2 + s.find("/*") : s.rfind("*/")]
[docs] def col(self, c): """Parse colour specification""" m = self.COLOUR.search(c) if not m: self.logger.fatal("Cannot parse colour specification %r.", c) raise ParseError( "XPM reader: Cannot parse colour specification {0!r}.".format(c) ) value = m.group("value") color = m.group("symbol") self.logger.debug("%s: %s %s\n", c.strip(), color, value) return color, value