# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8
#
# MDAnalysis --- http://www.MDAnalysis.org
# Copyright (c) 2006-2015 Naveen Michaud-Agrawal, Elizabeth J. Denning, Oliver Beckstein
# and contributors (see AUTHORS for the full list)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#
"""XYZ trajectory reader --- :mod:`MDAnalysis.coordinates.XYZ`
==============================================================
The :ref:`XYZ format <xyz-format>` is a loosely defined, simple
coordinate trajectory format. The implemented format definition was
taken from the `VMD xyzplugin`_ and is therefore compatible with VMD.
Note the following:
* Comments are not allowed in the XYZ file (we neither read nor write
them to remain compatible with VMD).
* The atom name (first column) is ignored during reading.
* The coordinates are assumed to be space-delimited rather than fixed
width (this may cause issues - see below).
* All fields to the right of the z-coordinate are ignored.
* The unitcell information is all zeros since this is not recorded in
the XYZ format.
.. rubric:: Units
* Coordinates are in Angstroms.
* The length of a timestep can be set by passing the *dt* argument,
it's assumed to be in ps (default: 1 ps).
There appears to be no rigid format definition so it is likely users
will need to tweak this class.
.. _xyz-format:
XYZ File format
---------------
Definiton used by the :class:`XYZReader` and :class:`XYZWriter` (and
the `VMD xyzplugin`_ from whence the definition was taken)::
[ comment line ] !! NOT IMPLEMENTED !! DO NOT INCLUDE
[ N ] # of atoms, required by this xyz reader plugin line 1
[ molecule name ] name of molecule (can be blank) line 2
atom1 x y z [optional data] atom name followed by xyz coords line 3
atom2 x y z [ ... ] and (optionally) other data.
...
atomN x y z [ ... ] line N+2
.. Note::
* comment lines not implemented (do not include them)
* molecule name: the line is required but the content is ignored
at the moment
* optional data (after the coordinates) are presently ignored
.. Links
.. _`VMD xyzplugin`: http://www.ks.uiuc.edu/Research/vmd/plugins/molfile/xyzplugin.html
"""
import os
import errno
import numpy as np
import itertools
from . import base
from ..core import flags
from ..lib import util
from ..lib.util import cached
from ..exceptions import NoDataError
from ..version import __version__
[docs]class XYZWriter(base.Writer):
"""Writes an XYZ file
The XYZ file format is not formally defined. This writer follows
the VMD implementation for the molfile `xyzplugin`_.
.. _xyzplugin:
http://www.ks.uiuc.edu/Research/vmd/plugins/molfile/xyzplugin.html
"""
format = 'XYZ'
# these are assumed!
units = {'time': 'ps', 'length': 'Angstrom'}
def __init__(self, *args, **kwargs):
"""Initialize the XYZ trajectory writer
:Arguments:
*filename*
file name of trajectory file. If it ends with "gz" then the file
will be gzip-compressed; if it ends with "bz2" it will be bzip2
compressed.
:Keywords:
*atoms*
Provide atom names: This can be a list of names or an
:class:`AtomGroup`. If none is provided, atoms will
be called 'X' in the output. These atom names will be
used when a trajectory is written from raw
:class:`Timestep` objects which do not contain atom
information.
If you write a :class:`AtomGroup` with
:meth:`XYZWriter.write` then atom information is taken
at each step and *atoms* is ignored.
*remark*
single line of text ("molecule name")
"""
# n_atoms is ignored ...
self.filename = args[0]
# convert length and time to base units on the fly?
convert_units = kwargs.pop('convert_units', None)
self.convert_units = convert_units if convert_units is not None else flags['convert_lengths']
self.atomnames = self._get_atomnames(kwargs.pop('atoms', "X"))
self.remark = kwargs.pop('remark',
"Written by {0} (release {1})".format(self.__class__.__name__, __version__))
self.xyz = util.anyopen(self.filename, 'w') # can also be gz, bz2
def _get_atomnames(self, atoms):
"""Return a list of atom names"""
# AtomGroup
try:
return atoms.names
except AttributeError:
pass
# universe?
try:
return atoms.atoms.names
except AttributeError:
pass
# list or string (can be a single atom name... deal with this in write_next_timestep() once we know n_atoms)
return np.asarray(util.asiterable(atoms))
[docs] def close(self):
"""Close the trajectory file and finalize the writing"""
if self.xyz is not None:
self.xyz.write("\n")
self.xyz.close()
self.xyz = None
[docs] def write(self, obj):
"""Write object *obj* at current trajectory frame to file.
*obj* can be a :class:`~MDAnalysis.core.AtomGroup.AtomGroup`)
or a whole :class:`~MDAnalysis.core.AtomGroup.Universe`.
Atom names in the output are taken from the *obj* or default
to the value of the *atoms* keyword supplied to the
:class:`XYZWriter` constructor.
:Arguments:
*obj*
:class:`~MDAnalysis.core.AtomGroup.AtomGroup` or
:class:`~MDAnalysis.core.AtomGroup.Universe`
"""
# prepare the Timestep and extract atom names if possible
# (The way it is written it should be possible to write
# trajectories with frames that differ in atom numbers
# but this is not tested.)
try:
atoms = obj.atoms
except AttributeError:
atoms = None
if atoms: # have a AtomGroup
if hasattr(obj, 'universe'):
# For AtomGroup and children (Residue, ResidueGroup, Segment)
ts_full = obj.universe.trajectory.ts
if ts_full.n_atoms == atoms.n_atoms:
ts = ts_full
else:
# Only populate a time step with the selected atoms.
ts = ts_full.copy_slice(atoms.indices)
elif hasattr(obj, 'trajectory'):
# For Universe only --- get everything
ts = obj.trajectory.ts
# update atom names
self.atomnames = atoms.names
else:
ts = obj
self.write_next_timestep(ts)
[docs] def write_next_timestep(self, ts=None):
"""Write coordinate information in *ts* to the trajectory"""
if ts is None:
if not hasattr(self, "ts"):
raise NoDataError("XYZWriter: no coordinate data to write to trajectory file")
else:
ts = self.ts
if len(self.atomnames) != ts.n_atoms:
self.atomnames = np.array([self.atomnames[0]] * ts.n_atoms)
if self.convert_units:
coordinates = self.convert_pos_to_native(ts._pos, inplace=False)
else:
coordinates = ts._pos
self.xyz.write("{0:d}\n".format(ts.n_atoms))
self.xyz.write("frame {0}\n".format(ts.frame))
for atom, (x, y, z) in itertools.izip(self.atomnames, coordinates):
self.xyz.write("%8s %10.5f %10.5f %10.5f\n" % (atom, x, y, z))
[docs]class XYZReader(base.Reader):
"""Reads from an XYZ file
:Data:
:attr:`ts`
Timestep object containing coordinates of current frame
:Methods:
``len(xyz)``
return number of frames in xyz
``for ts in xyz:``
iterate through trajectory
.. Note: this can read both compressed (foo.xyz) and compressed
(foo.xyz.bz2 or foo.xyz.gz) files; uncompression is handled
on the fly and also reads streams via
:class:`~MDAnalysis.lib.util.NamedStream`.
The XYZ file format follows VMD's xyzplugin_ and is also described
under :ref:`XYZ format <xyz-format>`.
.. versionchanged:: 0.11.0
Frames now 0-based instead of 1-based. Added *dt* and
*time_offset* keywords (passed to :class:`Timestep`)
"""
# Phil Fowler:
# Validation: the geometric centre of 1284 atoms was calculated over
# 500 frames using both MDAnalysis and a VMD Tcl script. There was
# exact agreement (measured to three decimal places). bzipped and
# gzipped versions of the XYZ file were also tested
format = "XYZ"
# these are assumed!
units = {'time': 'ps', 'length': 'Angstrom'}
_Timestep = base.Timestep
def __init__(self, filename, **kwargs):
super(XYZReader, self).__init__(filename, **kwargs)
# the filename has been parsed to be either be foo.xyz or foo.xyz.bz2 by
# coordinates::core.py so the last file extension will tell us if it is
# bzipped or not
root, ext = os.path.splitext(self.filename)
self.xyzfile = util.anyopen(self.filename, "r")
self.compression = ext[1:] if ext[1:] != "xyz" else None
self._cache = dict()
self.ts = self._Timestep(self.n_atoms, **self._ts_kwargs)
# Haven't quite figured out where to start with all the self._reopen()
# etc.
# (Also cannot just use seek() or reset() because that would break
# with urllib2.urlopen() streams)
self._read_next_timestep()
@property
@cached('n_atoms')
def n_atoms(self):
"""number of atoms in a frame"""
with util.anyopen(self.filename, 'r') as f:
n = f.readline()
# need to check type of n
return int(n)
@property
@cached('n_frames')
def n_frames(self):
try:
return self._read_xyz_n_frames()
except IOError:
return 0
def _read_xyz_n_frames(self):
# the number of lines in the XYZ file will be 2 greater than the
# number of atoms
linesPerFrame = self.n_atoms + 2
counter = 0
offsets = []
with util.anyopen(self.filename, 'r') as f:
line = True
while line:
if not counter % linesPerFrame:
offsets.append(f.tell())
line = f.readline()
counter += 1
# need to check this is an integer!
n_frames = int(counter / linesPerFrame)
self._offsets = offsets
return n_frames
def _read_frame(self, frame):
self.xyzfile.seek(self._offsets[frame])
self.ts.frame = frame - 1 # gets +1'd in next
return self._read_next_timestep()
def _read_next_timestep(self, ts=None):
# check that the timestep object exists
if ts is None:
ts = self.ts
f = self.xyzfile
try:
# we assume that there are only two header lines per frame
f.readline()
f.readline()
for i in xrange(self.n_atoms):
self.ts._pos[i] = map(float, f.readline().split()[1:4])
ts.frame += 1
return ts
except (ValueError, IndexError) as err:
raise EOFError(err)
[docs] def rewind(self):
"""reposition on first frame"""
self._reopen()
# the next method calls _read_next_timestep
self.next()
def _reopen(self):
self.close()
self.open_trajectory()
def open_trajectory(self):
if self.xyzfile is not None:
raise IOError(
errno.EALREADY, 'XYZ file already opened', self.filename)
self.xyzfile = util.anyopen(self.filename, "r")
# reset ts
ts = self.ts
ts.frame = -1
return self.xyzfile
[docs] def Writer(self, filename, **kwargs):
"""Returns a XYZWriter for *filename* with the same parameters as this XYZ.
All values can be changed through keyword arguments.
:Arguments:
*filename*
filename of the output DCD trajectory
:Keywords:
*atoms*
names of the atoms (if not taken from atom groups)
:Returns: :class:`XYZWriter` (see there for more details)
"""
return XYZWriter(filename, **kwargs)
[docs] def close(self):
"""Close xyz trajectory file if it was open."""
if self.xyzfile is None:
return
self.xyzfile.close()
self.xyzfile = None